Add the support of the chroma intra prediction on Haswell
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         SUBMB_SHAPE_MASK        0x00FF00
80
81 #define         INTER_MV8               (4 << 20)
82 #define         INTER_MV32              (6 << 20)
83
84
85 static void
86 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
87                           int standard_select,
88                           struct intel_encoder_context *encoder_context)
89 {
90     struct intel_batchbuffer *batch = encoder_context->base.batch;
91     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
92
93     assert(standard_select == MFX_FORMAT_MPEG2 ||
94            standard_select == MFX_FORMAT_AVC);
95
96     BEGIN_BCS_BATCH(batch, 5);
97
98     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
99     OUT_BCS_BATCH(batch,
100                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
101                   (MFD_MODE_VLD << 15) | /* VLD mode */
102                   (1 << 10) | /* Stream-Out Enable */
103                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
104                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
105                   (0 << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                 struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164         /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168         /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222     BEGIN_BCS_BATCH(batch, 11);
223
224     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
225     OUT_BCS_BATCH(batch, 0);
226     OUT_BCS_BATCH(batch, 0);
227     /* MFX Indirect MV Object Base Address */
228     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
229     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
230     OUT_BCS_BATCH(batch, 0);
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
235     OUT_BCS_RELOC(batch,
236                   mfc_context->mfc_indirect_pak_bse_object.bo,
237                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238                   0);
239     OUT_BCS_RELOC(batch,
240                   mfc_context->mfc_indirect_pak_bse_object.bo,
241                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
242                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
243
244     ADVANCE_BCS_BATCH(batch);
245 }
246
247 static void
248 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
249                        struct intel_encoder_context *encoder_context)
250 {
251     struct intel_batchbuffer *batch = encoder_context->base.batch;
252     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
253     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
254
255     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
256     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
257
258     BEGIN_BCS_BATCH(batch, 16);
259
260     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
261     OUT_BCS_BATCH(batch,
262                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
263     OUT_BCS_BATCH(batch, 
264                   ((height_in_mbs - 1) << 16) | 
265                   ((width_in_mbs - 1) << 0));
266     OUT_BCS_BATCH(batch, 
267                   (0 << 24) |   /* Second Chroma QP Offset */
268                   (0 << 16) |   /* Chroma QP Offset */
269                   (0 << 14) |   /* Max-bit conformance Intra flag */
270                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
271                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
272                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
273                   (0 << 8)  |   /* FIXME: Image Structure */
274                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
275     OUT_BCS_BATCH(batch,
276                   (0 << 16) |   /* Mininum Frame size */
277                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
278                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
279                   (0 << 13) |   /* CABAC 0 word insertion test enable */
280                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
281                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
282                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
283                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
284                   (0 << 6)  |   /* Only valid for VLD decoding mode */
285                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
286                   (0 << 4)  |   /* Direct 8x8 inference flag */
287                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
288                   (1 << 2)  |   /* Frame MB only flag */
289                   (0 << 1)  |   /* MBAFF mode is in active */
290                   (0 << 0));    /* Field picture flag */
291     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
292     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
293                   (0xBB8 << 16) |       /* InterMbMaxSz */
294                   (0xEE8) );            /* IntraMbMaxSz */
295     OUT_BCS_BATCH(batch, 0);            /* Reserved */
296     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
297     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
298     OUT_BCS_BATCH(batch, 0x8C000000);
299     OUT_BCS_BATCH(batch, 0x00010000);
300     OUT_BCS_BATCH(batch, 0);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304
305     ADVANCE_BCS_BATCH(batch);
306 }
307
308 static void
309 gen75_mfc_qm_state(VADriverContextP ctx,
310                   int qm_type,
311                   unsigned int *qm,
312                   int qm_length,
313                   struct intel_encoder_context *encoder_context)
314 {
315     struct intel_batchbuffer *batch = encoder_context->base.batch;
316     unsigned int qm_buffer[16];
317
318     assert(qm_length <= 16);
319     assert(sizeof(*qm) == 4);
320     memcpy(qm_buffer, qm, qm_length * 4);
321
322     BEGIN_BCS_BATCH(batch, 18);
323     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
324     OUT_BCS_BATCH(batch, qm_type << 0);
325     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
326     ADVANCE_BCS_BATCH(batch);
327 }
328
329 static void
330 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
331 {
332     unsigned int qm[16] = {
333         0x10101010, 0x10101010, 0x10101010, 0x10101010,
334         0x10101010, 0x10101010, 0x10101010, 0x10101010,
335         0x10101010, 0x10101010, 0x10101010, 0x10101010,
336         0x10101010, 0x10101010, 0x10101010, 0x10101010
337     };
338
339     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
340     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
341     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
342     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
343 }
344
345 static void
346 gen75_mfc_fqm_state(VADriverContextP ctx,
347                    int fqm_type,
348                    unsigned int *fqm,
349                    int fqm_length,
350                    struct intel_encoder_context *encoder_context)
351 {
352     struct intel_batchbuffer *batch = encoder_context->base.batch;
353     unsigned int fqm_buffer[32];
354
355     assert(fqm_length <= 32);
356     assert(sizeof(*fqm) == 4);
357     memcpy(fqm_buffer, fqm, fqm_length * 4);
358
359     BEGIN_BCS_BATCH(batch, 34);
360     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
361     OUT_BCS_BATCH(batch, fqm_type << 0);
362     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
363     ADVANCE_BCS_BATCH(batch);
364 }
365
366 static void
367 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
368 {
369     unsigned int qm[32] = {
370         0x10001000, 0x10001000, 0x10001000, 0x10001000,
371         0x10001000, 0x10001000, 0x10001000, 0x10001000,
372         0x10001000, 0x10001000, 0x10001000, 0x10001000,
373         0x10001000, 0x10001000, 0x10001000, 0x10001000,
374         0x10001000, 0x10001000, 0x10001000, 0x10001000,
375         0x10001000, 0x10001000, 0x10001000, 0x10001000,
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000
378     };
379
380     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
381     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
382     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
383     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
384 }
385
386 static void
387 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
388                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
389                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
390                            struct intel_batchbuffer *batch)
391 {
392     if (batch == NULL)
393         batch = encoder_context->base.batch;
394
395     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
396
397     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
398     OUT_BCS_BATCH(batch,
399                   (0 << 16) |   /* always start at offset 0 */
400                   (data_bits_in_last_dw << 8) |
401                   (skip_emul_byte_count << 4) |
402                   (!!emulation_flag << 3) |
403                   ((!!is_last_header) << 2) |
404                   ((!!is_end_of_slice) << 1) |
405                   (0 << 0));    /* FIXME: ??? */
406     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
407
408     ADVANCE_BCS_BATCH(batch);
409 }
410
411
412 static void gen75_mfc_init(VADriverContextP ctx,
413                         struct encode_state *encode_state,
414                         struct intel_encoder_context *encoder_context)
415 {
416     struct i965_driver_data *i965 = i965_driver_data(ctx);
417     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
418     dri_bo *bo;
419     int i;
420     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
421     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
422     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
423
424     /*Encode common setup for MFC*/
425     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
426     mfc_context->post_deblocking_output.bo = NULL;
427
428     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
429     mfc_context->pre_deblocking_output.bo = NULL;
430
431     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
432     mfc_context->uncompressed_picture_source.bo = NULL;
433
434     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
435     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
436
437     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
438         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
439         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
440         mfc_context->direct_mv_buffers[i].bo = NULL;
441     }
442
443     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
444         if (mfc_context->reference_surfaces[i].bo != NULL)
445             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
446         mfc_context->reference_surfaces[i].bo = NULL;  
447     }
448
449     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
450     bo = dri_bo_alloc(i965->intel.bufmgr,
451                       "Buffer",
452                       width_in_mbs * 64,
453                       64);
454     assert(bo);
455     mfc_context->intra_row_store_scratch_buffer.bo = bo;
456
457     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
458     bo = dri_bo_alloc(i965->intel.bufmgr,
459                       "Buffer",
460                       width_in_mbs * height_in_mbs * 16,
461                       64);
462     assert(bo);
463     mfc_context->macroblock_status_buffer.bo = bo;
464
465     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
466     bo = dri_bo_alloc(i965->intel.bufmgr,
467                       "Buffer",
468                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
469                       64);
470     assert(bo);
471     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
472
473     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
474     bo = dri_bo_alloc(i965->intel.bufmgr,
475                       "Buffer",
476                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
477                       0x1000);
478     assert(bo);
479     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
480
481     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
482     mfc_context->mfc_batchbuffer_surface.bo = NULL;
483
484     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
485     mfc_context->aux_batchbuffer_surface.bo = NULL;
486
487     if (mfc_context->aux_batchbuffer)
488         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
489
490     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
491     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
492     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
493     mfc_context->aux_batchbuffer_surface.pitch = 16;
494     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
495     mfc_context->aux_batchbuffer_surface.size_block = 16;
496
497     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
498 }
499
500 static void
501 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
502                                 struct intel_encoder_context *encoder_context)
503 {
504     struct intel_batchbuffer *batch = encoder_context->base.batch;
505     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
506     int i;
507
508     BEGIN_BCS_BATCH(batch, 61);
509
510     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
511
512     /* the DW1-3 is for pre_deblocking */
513     if (mfc_context->pre_deblocking_output.bo)
514         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
515                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
516                       0);
517     else
518         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
519
520         OUT_BCS_BATCH(batch, 0);
521         OUT_BCS_BATCH(batch, 0);
522      /* the DW4-6 is for the post_deblocking */
523
524     if (mfc_context->post_deblocking_output.bo)
525         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
526                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
527                       0);                                                                                       /* post output addr  */ 
528     else
529         OUT_BCS_BATCH(batch, 0);
530         OUT_BCS_BATCH(batch, 0);
531         OUT_BCS_BATCH(batch, 0);
532
533      /* the DW7-9 is for the uncompressed_picture */
534     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
535                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                   0); /* uncompressed data */
537
538         OUT_BCS_BATCH(batch, 0);
539         OUT_BCS_BATCH(batch, 0);
540
541      /* the DW10-12 is for the mb status */
542     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
543                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
544                   0); /* StreamOut data*/
545         OUT_BCS_BATCH(batch, 0);
546         OUT_BCS_BATCH(batch, 0);
547
548      /* the DW13-15 is for the intra_row_store_scratch */
549     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
550                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551                   0);   
552         OUT_BCS_BATCH(batch, 0);
553         OUT_BCS_BATCH(batch, 0);
554
555      /* the DW16-18 is for the deblocking filter */
556     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
557                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
558                   0);
559         OUT_BCS_BATCH(batch, 0);
560         OUT_BCS_BATCH(batch, 0);
561
562     /* the DW 19-50 is for Reference pictures*/
563     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
564         if ( mfc_context->reference_surfaces[i].bo != NULL) {
565             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
566                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
567                           0);                   
568         } else {
569             OUT_BCS_BATCH(batch, 0);
570         }
571         OUT_BCS_BATCH(batch, 0);
572     }
573         OUT_BCS_BATCH(batch, 0);
574
575         /* The DW 52-54 is for the MB status buffer */
576     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);                                                                                   /* Macroblock status buffer*/
579         
580         OUT_BCS_BATCH(batch, 0);
581         OUT_BCS_BATCH(batch, 0);
582
583         /* the DW 55-57 is the ILDB buffer */
584         OUT_BCS_BATCH(batch, 0);
585         OUT_BCS_BATCH(batch, 0);
586         OUT_BCS_BATCH(batch, 0);
587
588         /* the DW 58-60 is the second ILDB buffer */
589         OUT_BCS_BATCH(batch, 0);
590         OUT_BCS_BATCH(batch, 0);
591         OUT_BCS_BATCH(batch, 0);
592     ADVANCE_BCS_BATCH(batch);
593 }
594
595 static void
596 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
597 {
598     struct intel_batchbuffer *batch = encoder_context->base.batch;
599     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
600     struct i965_driver_data *i965 = i965_driver_data(ctx);
601     int i;
602
603     if (IS_STEPPING_BPLUS(i965)) {
604         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
605         return;
606     }
607
608     BEGIN_BCS_BATCH(batch, 25);
609
610     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
611
612     if (mfc_context->pre_deblocking_output.bo)
613         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
614                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
615                       0);
616     else
617         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
618
619     if (mfc_context->post_deblocking_output.bo)
620         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
621                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
622                       0);                                                                                       /* post output addr  */ 
623     else
624         OUT_BCS_BATCH(batch, 0);
625
626     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
627                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
628                   0);                                                                                   /* uncompressed data */
629     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
630                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
631                   0);                                                                                   /* StreamOut data*/
632     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
633                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
634                   0);   
635     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
636                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
637                   0);
638     /* 7..22 Reference pictures*/
639     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
640         if ( mfc_context->reference_surfaces[i].bo != NULL) {
641             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
642                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
643                           0);                   
644         } else {
645             OUT_BCS_BATCH(batch, 0);
646         }
647     }
648     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
649                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
650                   0);                                                                                   /* Macroblock status buffer*/
651
652         OUT_BCS_BATCH(batch, 0);
653
654     ADVANCE_BCS_BATCH(batch);
655 }
656
657 static void
658 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
659                                 struct intel_encoder_context *encoder_context)
660 {
661     struct intel_batchbuffer *batch = encoder_context->base.batch;
662     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
663
664     int i;
665
666     BEGIN_BCS_BATCH(batch, 71);
667
668     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
669
670     /* Reference frames and Current frames */
671     /* the DW1-32 is for the direct MV for reference */
672     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
673         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
674             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
675                           I915_GEM_DOMAIN_INSTRUCTION, 0,
676                           0);
677             OUT_BCS_BATCH(batch, 0);
678         } else {
679             OUT_BCS_BATCH(batch, 0);
680             OUT_BCS_BATCH(batch, 0);
681         }
682     }
683         OUT_BCS_BATCH(batch, 0);
684
685         /* the DW34-36 is the MV for the current reference */
686         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
687                           I915_GEM_DOMAIN_INSTRUCTION, 0,
688                           0);
689
690         OUT_BCS_BATCH(batch, 0);
691         OUT_BCS_BATCH(batch, 0);
692
693     /* POL list */
694     for(i = 0; i < 32; i++) {
695         OUT_BCS_BATCH(batch, i/2);
696     }
697     OUT_BCS_BATCH(batch, 0);
698     OUT_BCS_BATCH(batch, 0);
699
700     ADVANCE_BCS_BATCH(batch);
701 }
702
703 static void
704 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
705 {
706     struct intel_batchbuffer *batch = encoder_context->base.batch;
707     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
708     struct i965_driver_data *i965 = i965_driver_data(ctx);
709     int i;
710
711     if (IS_STEPPING_BPLUS(i965)) {
712         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
713         return;
714     }
715
716     BEGIN_BCS_BATCH(batch, 69);
717
718     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
719
720     /* Reference frames and Current frames */
721     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
722         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
723             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
724                           I915_GEM_DOMAIN_INSTRUCTION, 0,
725                           0);
726         } else {
727             OUT_BCS_BATCH(batch, 0);
728         }
729     }
730
731     /* POL list */
732     for(i = 0; i < 32; i++) {
733         OUT_BCS_BATCH(batch, i/2);
734     }
735     OUT_BCS_BATCH(batch, 0);
736     OUT_BCS_BATCH(batch, 0);
737
738     ADVANCE_BCS_BATCH(batch);
739 }
740
741 static void
742 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
743 {
744     struct intel_batchbuffer *batch = encoder_context->base.batch;
745     int i;
746
747     BEGIN_BCS_BATCH(batch, 10);
748     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
749     OUT_BCS_BATCH(batch, 0);                  //Select L0
750     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
751     for(i = 0; i < 7; i++) {
752         OUT_BCS_BATCH(batch, 0x80808080);
753     }   
754     ADVANCE_BCS_BATCH(batch);
755
756     BEGIN_BCS_BATCH(batch, 10);
757     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
758     OUT_BCS_BATCH(batch, 1);                  //Select L1
759     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
760     for(i = 0; i < 7; i++) {
761         OUT_BCS_BATCH(batch, 0x80808080);
762     }   
763     ADVANCE_BCS_BATCH(batch);
764 }
765
766
767 static void
768 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
769                                 struct intel_encoder_context *encoder_context)
770 {
771     struct intel_batchbuffer *batch = encoder_context->base.batch;
772     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
773
774     BEGIN_BCS_BATCH(batch, 10);
775
776     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
777     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
778                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
779                   0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782         
783         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787
788         /* the DW7-9 is for Bitplane Read Buffer Base Address */
789     OUT_BCS_BATCH(batch, 0);
790     OUT_BCS_BATCH(batch, 0);
791     OUT_BCS_BATCH(batch, 0);
792
793     ADVANCE_BCS_BATCH(batch);
794 }
795
796 static void
797 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
798 {
799     struct intel_batchbuffer *batch = encoder_context->base.batch;
800     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
801     struct i965_driver_data *i965 = i965_driver_data(ctx);
802
803     if (IS_STEPPING_BPLUS(i965)) {
804         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
805         return;
806     }
807
808     BEGIN_BCS_BATCH(batch, 4);
809
810     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
811     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
812                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
813                   0);
814     OUT_BCS_BATCH(batch, 0);
815     OUT_BCS_BATCH(batch, 0);
816
817     ADVANCE_BCS_BATCH(batch);
818 }
819
820
821 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
822                                       struct encode_state *encode_state,
823                                       struct intel_encoder_context *encoder_context)
824 {
825     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
826
827     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
828     mfc_context->set_surface_state(ctx, encoder_context);
829     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
830     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
831     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
832     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
833     mfc_context->avc_qm_state(ctx, encoder_context);
834     mfc_context->avc_fqm_state(ctx, encoder_context);
835     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
836     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
837 }
838
839
840 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
841                              struct encode_state *encode_state,
842                              struct intel_encoder_context *encoder_context)
843 {
844     struct intel_batchbuffer *batch = encoder_context->base.batch;
845
846     intel_batchbuffer_flush(batch);             //run the pipeline
847
848     return VA_STATUS_SUCCESS;
849 }
850
851
852 static VAStatus
853 gen75_mfc_stop(VADriverContextP ctx, 
854               struct encode_state *encode_state,
855               struct intel_encoder_context *encoder_context,
856               int *encoded_bits_size)
857 {
858     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
859     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
860     VACodedBufferSegment *coded_buffer_segment;
861     
862     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
863     assert(vaStatus == VA_STATUS_SUCCESS);
864     *encoded_bits_size = coded_buffer_segment->size * 8;
865     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
866
867     return VA_STATUS_SUCCESS;
868 }
869
870
871 static void
872 gen75_mfc_avc_slice_state(VADriverContextP ctx,
873                          VAEncPictureParameterBufferH264 *pic_param,
874                          VAEncSliceParameterBufferH264 *slice_param,
875                          struct encode_state *encode_state,
876                          struct intel_encoder_context *encoder_context,
877                          int rate_control_enable,
878                          int qp,
879                          struct intel_batchbuffer *batch)
880 {
881     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
882     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
883     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
884     int beginmb = slice_param->macroblock_address;
885     int endmb = beginmb + slice_param->num_macroblocks;
886     int beginx = beginmb % width_in_mbs;
887     int beginy = beginmb / width_in_mbs;
888     int nextx =  endmb % width_in_mbs;
889     int nexty = endmb / width_in_mbs;
890     int slice_type = slice_param->slice_type;
891     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
892     int bit_rate_control_target, maxQpN, maxQpP;
893     unsigned char correct[6], grow, shrink;
894     int i;
895     int weighted_pred_idc = 0;
896     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
897     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
898
899     if (batch == NULL)
900         batch = encoder_context->base.batch;
901
902     bit_rate_control_target = slice_type;
903     if (slice_type == SLICE_TYPE_SP)
904         bit_rate_control_target = SLICE_TYPE_P;
905     else if (slice_type == SLICE_TYPE_SI)
906         bit_rate_control_target = SLICE_TYPE_I;
907
908     if (slice_type == SLICE_TYPE_P) {
909         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
910     } else if (slice_type == SLICE_TYPE_B) {
911         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
912
913         if (weighted_pred_idc == 2) {
914             /* 8.4.3 - Derivation process for prediction weights (8-279) */
915             luma_log2_weight_denom = 5;
916             chroma_log2_weight_denom = 5;
917         }
918     }
919
920     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
921     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
922
923     for (i = 0; i < 6; i++)
924         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
925
926     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
927         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
928     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
929         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
930
931     BEGIN_BCS_BATCH(batch, 11);;
932
933     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
934     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
935
936     if (slice_type == SLICE_TYPE_I) {
937         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
938     } else {
939         OUT_BCS_BATCH(batch,
940                       (1 << 16) |                       /*1 reference frame*/
941                       (chroma_log2_weight_denom << 8) |
942                       (luma_log2_weight_denom << 0));
943     }
944
945     OUT_BCS_BATCH(batch, 
946                   (weighted_pred_idc << 30) |
947                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
948                   (slice_param->disable_deblocking_filter_idc << 27) |
949                   (slice_param->cabac_init_idc << 24) |
950                   (qp<<16) |                    /*Slice Quantization Parameter*/
951                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
952                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
953     OUT_BCS_BATCH(batch,
954                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
955                   (beginx << 16) |
956                   slice_param->macroblock_address );
957     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
958     OUT_BCS_BATCH(batch, 
959                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
960                   (1 << 30) |           /*ResetRateControlCounter*/
961                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
962                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
963                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
964                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
965                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
966                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
967                   (last_slice << 19) |     /*IsLastSlice*/
968                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
969                   (1 << 17) |       /*HeaderPresentFlag*/       
970                   (1 << 16) |       /*SliceData PresentFlag*/
971                   (1 << 15) |       /*TailPresentFlag*/
972                   (1 << 13) |       /*RBSP NAL TYPE*/   
973                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
974     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
975     OUT_BCS_BATCH(batch,
976                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
977                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
978                   (shrink << 8)  |
979                   (grow << 0));   
980     OUT_BCS_BATCH(batch,
981                   (correct[5] << 20) |
982                   (correct[4] << 16) |
983                   (correct[3] << 12) |
984                   (correct[2] << 8) |
985                   (correct[1] << 4) |
986                   (correct[0] << 0));
987     OUT_BCS_BATCH(batch, 0);
988
989     ADVANCE_BCS_BATCH(batch);
990 }
991
992
993 #ifdef MFC_SOFTWARE_HASWELL
994
995 static int
996 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
997                                 int qp,unsigned int *msg,
998                               struct intel_encoder_context *encoder_context,
999                               unsigned char target_mb_size, unsigned char max_mb_size,
1000                               struct intel_batchbuffer *batch)
1001 {
1002     int len_in_dwords = 12;
1003     unsigned int intra_msg;
1004 #define         INTRA_MSG_FLAG          (1 << 13)
1005 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1006     if (batch == NULL)
1007         batch = encoder_context->base.batch;
1008
1009     BEGIN_BCS_BATCH(batch, len_in_dwords);
1010
1011     intra_msg = msg[0] & 0xC0FF;
1012     intra_msg |= INTRA_MSG_FLAG;
1013     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1014     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1015     OUT_BCS_BATCH(batch, 0);
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 
1018                   (0 << 24) |           /* PackedMvNum, Debug*/
1019                   (0 << 20) |           /* No motion vector */
1020                   (1 << 19) |           /* CbpDcY */
1021                   (1 << 18) |           /* CbpDcU */
1022                   (1 << 17) |           /* CbpDcV */
1023                   intra_msg);
1024
1025     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1026     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1027     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1028
1029     /*Stuff for Intra MB*/
1030     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1031     OUT_BCS_BATCH(batch, msg[2]);       
1032     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1033     
1034     /*MaxSizeInWord and TargetSzieInWord*/
1035     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1036                   (target_mb_size << 16) );
1037
1038     OUT_BCS_BATCH(batch, 0);
1039
1040     ADVANCE_BCS_BATCH(batch);
1041
1042     return len_in_dwords;
1043 }
1044
1045 static int
1046 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1047                               unsigned int *msg, unsigned int offset,
1048                               struct intel_encoder_context *encoder_context,
1049                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1050                               struct intel_batchbuffer *batch)
1051 {
1052     int len_in_dwords = 12;
1053         unsigned int inter_msg = 0;
1054     if (batch == NULL)
1055         batch = encoder_context->base.batch;
1056
1057     BEGIN_BCS_BATCH(batch, len_in_dwords);
1058
1059     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1060
1061         inter_msg = 32;
1062         /* MV quantity */
1063         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1064                 if (msg[1] & SUBMB_SHAPE_MASK)
1065                         inter_msg = 128;
1066         }
1067     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1068     OUT_BCS_BATCH(batch, offset);
1069         inter_msg = msg[0] & (0x1F00FFFF);
1070         inter_msg |= INTER_MV8;
1071         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1072         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1073                         (msg[1] & SUBMB_SHAPE_MASK)) {
1074                 inter_msg |= INTER_MV32;
1075         }
1076
1077     OUT_BCS_BATCH(batch, inter_msg);
1078
1079     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1080     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1081 #if 0 
1082     if ( slice_type == SLICE_TYPE_B) {
1083         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1084     } else {
1085         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1086     }
1087 #else
1088     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1089 #endif
1090
1091         inter_msg = msg[1] >> 8;
1092     /*Stuff for Inter MB*/
1093     OUT_BCS_BATCH(batch, inter_msg);        
1094     OUT_BCS_BATCH(batch, 0x0);    
1095     OUT_BCS_BATCH(batch, 0x0);        
1096
1097     /*MaxSizeInWord and TargetSzieInWord*/
1098     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1099                   (target_mb_size << 16) );
1100
1101     OUT_BCS_BATCH(batch, 0x0);    
1102
1103     ADVANCE_BCS_BATCH(batch);
1104
1105     return len_in_dwords;
1106 }
1107
1108 #define         INTRA_RDO_OFFSET        4
1109 #define         INTER_RDO_OFFSET        54
1110 #define         INTER_MSG_OFFSET        52
1111 #define         INTER_MV_OFFSET         224
1112 #define         RDO_MASK                0xFFFF
1113
1114 static void 
1115 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1116                                        struct encode_state *encode_state,
1117                                        struct intel_encoder_context *encoder_context,
1118                                        int slice_index,
1119                                        struct intel_batchbuffer *slice_batch)
1120 {
1121     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1122     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1123     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1124     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1125     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1126     unsigned int *msg = NULL, offset = 0;
1127     unsigned char *msg_ptr = NULL;
1128     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1129     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1130     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1131     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1132     int i,x,y;
1133     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1134     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1135     unsigned char *slice_header = NULL;
1136     int slice_header_length_in_bits = 0;
1137     unsigned int tail_data[] = { 0x0, 0x0 };
1138     int slice_type = pSliceParameter->slice_type;
1139
1140
1141     if (rate_control_mode == VA_RC_CBR) {
1142         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1143         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1144     }
1145
1146     /* only support for 8-bit pixel bit-depth */
1147     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1148     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1149     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1150     assert(qp >= 0 && qp < 52);
1151
1152     gen75_mfc_avc_slice_state(ctx, 
1153                              pPicParameter,
1154                              pSliceParameter,
1155                              encode_state, encoder_context,
1156                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1157
1158     if ( slice_index == 0) 
1159         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1160
1161     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1162
1163     // slice hander
1164     mfc_context->insert_object(ctx, encoder_context,
1165                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1166                                5,  /* first 5 bytes are start code + nal unit type */
1167                                1, 0, 1, slice_batch);
1168
1169     dri_bo_map(vme_context->vme_output.bo , 1);
1170     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1171
1172     if (is_intra) {
1173         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1174     } else {
1175         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1176     }
1177    
1178     for (i = pSliceParameter->macroblock_address; 
1179          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1180         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1181         x = i % width_in_mbs;
1182         y = i / width_in_mbs;
1183         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1184
1185         if (is_intra) {
1186             assert(msg);
1187             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1188         } else {
1189             int inter_rdo, intra_rdo;
1190             inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1191             intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1192             offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1193             if (intra_rdo < inter_rdo) { 
1194                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1195             } else {
1196                 msg += INTER_MSG_OFFSET;
1197                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1198             }
1199         }
1200     }
1201    
1202     dri_bo_unmap(vme_context->vme_output.bo);
1203
1204     if ( last_slice ) {    
1205         mfc_context->insert_object(ctx, encoder_context,
1206                                    tail_data, 2, 8,
1207                                    2, 1, 1, 0, slice_batch);
1208     } else {
1209         mfc_context->insert_object(ctx, encoder_context,
1210                                    tail_data, 1, 8,
1211                                    1, 1, 1, 0, slice_batch);
1212     }
1213
1214     free(slice_header);
1215
1216 }
1217
1218 static dri_bo *
1219 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1220                                   struct encode_state *encode_state,
1221                                   struct intel_encoder_context *encoder_context)
1222 {
1223     struct i965_driver_data *i965 = i965_driver_data(ctx);
1224     struct intel_batchbuffer *batch;
1225     dri_bo *batch_bo;
1226     int i;
1227     int buffer_size;
1228     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1229     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1230     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1231
1232     buffer_size = width_in_mbs * height_in_mbs * 64;
1233     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1234     batch_bo = batch->buffer;
1235     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1236         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1237     }
1238
1239     intel_batchbuffer_align(batch, 8);
1240     
1241     BEGIN_BCS_BATCH(batch, 2);
1242     OUT_BCS_BATCH(batch, 0);
1243     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1244     ADVANCE_BCS_BATCH(batch);
1245
1246     dri_bo_reference(batch_bo);
1247     intel_batchbuffer_free(batch);
1248
1249     return batch_bo;
1250 }
1251
1252 #else
1253
1254 static void
1255 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1256                                     struct encode_state *encode_state,
1257                                     struct intel_encoder_context *encoder_context)
1258
1259 {
1260     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1261     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1262
1263     assert(vme_context->vme_output.bo);
1264     mfc_context->buffer_suface_setup(ctx,
1265                                      &mfc_context->gpe_context,
1266                                      &vme_context->vme_output,
1267                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1268                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1269     assert(mfc_context->aux_batchbuffer_surface.bo);
1270     mfc_context->buffer_suface_setup(ctx,
1271                                      &mfc_context->gpe_context,
1272                                      &mfc_context->aux_batchbuffer_surface,
1273                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1274                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1275 }
1276
1277 static void
1278 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1279                                      struct encode_state *encode_state,
1280                                      struct intel_encoder_context *encoder_context)
1281
1282 {
1283     struct i965_driver_data *i965 = i965_driver_data(ctx);
1284     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1285     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1286     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1287     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1288     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1289     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1290     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1291     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1292                                                            "MFC batchbuffer",
1293                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1294                                                            0x1000);
1295     mfc_context->buffer_suface_setup(ctx,
1296                                      &mfc_context->gpe_context,
1297                                      &mfc_context->mfc_batchbuffer_surface,
1298                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1299                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1300 }
1301
1302 static void
1303 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1304                                     struct encode_state *encode_state,
1305                                     struct intel_encoder_context *encoder_context)
1306 {
1307     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1308     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1309 }
1310
1311 static void
1312 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1313                                 struct encode_state *encode_state,
1314                                 struct intel_encoder_context *encoder_context)
1315 {
1316     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1317     struct gen6_interface_descriptor_data *desc;   
1318     int i;
1319     dri_bo *bo;
1320
1321     bo = mfc_context->gpe_context.idrt.bo;
1322     dri_bo_map(bo, 1);
1323     assert(bo->virtual);
1324     desc = bo->virtual;
1325
1326     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1327         struct i965_kernel *kernel;
1328
1329         kernel = &mfc_context->gpe_context.kernels[i];
1330         assert(sizeof(*desc) == 32);
1331
1332         /*Setup the descritor table*/
1333         memset(desc, 0, sizeof(*desc));
1334         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1335         desc->desc2.sampler_count = 0;
1336         desc->desc2.sampler_state_pointer = 0;
1337         desc->desc3.binding_table_entry_count = 2;
1338         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1339         desc->desc4.constant_urb_entry_read_offset = 0;
1340         desc->desc4.constant_urb_entry_read_length = 4;
1341                 
1342         /*kernel start*/
1343         dri_bo_emit_reloc(bo,   
1344                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1345                           0,
1346                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1347                           kernel->bo);
1348         desc++;
1349     }
1350
1351     dri_bo_unmap(bo);
1352 }
1353
1354 static void
1355 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1356                                     struct encode_state *encode_state,
1357                                     struct intel_encoder_context *encoder_context)
1358 {
1359     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1360     
1361     (void)mfc_context;
1362 }
1363
1364 static void
1365 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1366                                          int index,
1367                                          int head_offset,
1368                                          int batchbuffer_offset,
1369                                          int head_size,
1370                                          int tail_size,
1371                                          int number_mb_cmds,
1372                                          int first_object,
1373                                          int last_object,
1374                                          int last_slice,
1375                                          int mb_x,
1376                                          int mb_y,
1377                                          int width_in_mbs,
1378                                          int qp)
1379 {
1380     BEGIN_BATCH(batch, 12);
1381     
1382     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1383     OUT_BATCH(batch, index);
1384     OUT_BATCH(batch, 0);
1385     OUT_BATCH(batch, 0);
1386     OUT_BATCH(batch, 0);
1387     OUT_BATCH(batch, 0);
1388    
1389     /*inline data */
1390     OUT_BATCH(batch, head_offset);
1391     OUT_BATCH(batch, batchbuffer_offset);
1392     OUT_BATCH(batch, 
1393               head_size << 16 |
1394               tail_size);
1395     OUT_BATCH(batch,
1396               number_mb_cmds << 16 |
1397               first_object << 2 |
1398               last_object << 1 |
1399               last_slice);
1400     OUT_BATCH(batch,
1401               mb_y << 8 |
1402               mb_x);
1403     OUT_BATCH(batch,
1404               qp << 16 |
1405               width_in_mbs);
1406
1407     ADVANCE_BATCH(batch);
1408 }
1409
1410 static void
1411 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1412                                        struct intel_encoder_context *encoder_context,
1413                                        VAEncSliceParameterBufferH264 *slice_param,
1414                                        int head_offset,
1415                                        unsigned short head_size,
1416                                        unsigned short tail_size,
1417                                        int batchbuffer_offset,
1418                                        int qp,
1419                                        int last_slice)
1420 {
1421     struct intel_batchbuffer *batch = encoder_context->base.batch;
1422     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1423     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1424     int total_mbs = slice_param->num_macroblocks;
1425     int number_mb_cmds = 128;
1426     int starting_mb = 0;
1427     int last_object = 0;
1428     int first_object = 1;
1429     int i;
1430     int mb_x, mb_y;
1431     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1432
1433     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1434         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1435         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1436         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1437         assert(mb_x <= 255 && mb_y <= 255);
1438
1439         starting_mb += number_mb_cmds;
1440
1441         gen75_mfc_batchbuffer_emit_object_command(batch,
1442                                                  index,
1443                                                  head_offset,
1444                                                  batchbuffer_offset,
1445                                                  head_size,
1446                                                  tail_size,
1447                                                  number_mb_cmds,
1448                                                  first_object,
1449                                                  last_object,
1450                                                  last_slice,
1451                                                  mb_x,
1452                                                  mb_y,
1453                                                  width_in_mbs,
1454                                                  qp);
1455
1456         if (first_object) {
1457             head_offset += head_size;
1458             batchbuffer_offset += head_size;
1459         }
1460
1461         if (last_object) {
1462             head_offset += tail_size;
1463             batchbuffer_offset += tail_size;
1464         }
1465
1466         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1467
1468         first_object = 0;
1469     }
1470
1471     if (!last_object) {
1472         last_object = 1;
1473         number_mb_cmds = total_mbs % number_mb_cmds;
1474         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1475         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1476         assert(mb_x <= 255 && mb_y <= 255);
1477         starting_mb += number_mb_cmds;
1478
1479         gen75_mfc_batchbuffer_emit_object_command(batch,
1480                                                  index,
1481                                                  head_offset,
1482                                                  batchbuffer_offset,
1483                                                  head_size,
1484                                                  tail_size,
1485                                                  number_mb_cmds,
1486                                                  first_object,
1487                                                  last_object,
1488                                                  last_slice,
1489                                                  mb_x,
1490                                                  mb_y,
1491                                                  width_in_mbs,
1492                                                  qp);
1493     }
1494 }
1495                           
1496 /*
1497  * return size in Owords (16bytes)
1498  */         
1499 static int
1500 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1501                                struct encode_state *encode_state,
1502                                struct intel_encoder_context *encoder_context,
1503                                int slice_index,
1504                                int batchbuffer_offset)
1505 {
1506     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1507     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1508     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1509     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1510     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1511     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1512     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1513     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1514     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1515     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1516     unsigned char *slice_header = NULL;
1517     int slice_header_length_in_bits = 0;
1518     unsigned int tail_data[] = { 0x0, 0x0 };
1519     long head_offset;
1520     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1521     unsigned short head_size, tail_size;
1522     int slice_type = pSliceParameter->slice_type;
1523
1524     if (rate_control_mode == VA_RC_CBR) {
1525         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1526         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1527     }
1528
1529     /* only support for 8-bit pixel bit-depth */
1530     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1531     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1532     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1533     assert(qp >= 0 && qp < 52);
1534
1535     head_offset = old_used / 16;
1536     gen75_mfc_avc_slice_state(ctx,
1537                              pPicParameter,
1538                              pSliceParameter,
1539                              encode_state,
1540                              encoder_context,
1541                              (rate_control_mode == VA_RC_CBR),
1542                              qp,
1543                              slice_batch);
1544
1545     if (slice_index == 0)
1546         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1547
1548     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1549
1550     // slice hander
1551     mfc_context->insert_object(ctx,
1552                                encoder_context,
1553                                (unsigned int *)slice_header,
1554                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1555                                slice_header_length_in_bits & 0x1f,
1556                                5,  /* first 5 bytes are start code + nal unit type */
1557                                1,
1558                                0,
1559                                1,
1560                                slice_batch);
1561     free(slice_header);
1562
1563     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1564     used = intel_batchbuffer_used_size(slice_batch);
1565     head_size = (used - old_used) / 16;
1566     old_used = used;
1567
1568     /* tail */
1569     if (last_slice) {    
1570         mfc_context->insert_object(ctx,
1571                                    encoder_context,
1572                                    tail_data,
1573                                    2,
1574                                    8,
1575                                    2,
1576                                    1,
1577                                    1,
1578                                    0,
1579                                    slice_batch);
1580     } else {
1581         mfc_context->insert_object(ctx,
1582                                    encoder_context,
1583                                    tail_data,
1584                                    1,
1585                                    8,
1586                                    1,
1587                                    1,
1588                                    1,
1589                                    0,
1590                                    slice_batch);
1591     }
1592
1593     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1594     used = intel_batchbuffer_used_size(slice_batch);
1595     tail_size = (used - old_used) / 16;
1596
1597    
1598     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1599                                            encoder_context,
1600                                            pSliceParameter,
1601                                            head_offset,
1602                                            head_size,
1603                                            tail_size,
1604                                            batchbuffer_offset,
1605                                            qp,
1606                                            last_slice);
1607
1608     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1609 }
1610
1611 static void
1612 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1613                                   struct encode_state *encode_state,
1614                                   struct intel_encoder_context *encoder_context)
1615 {
1616     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1617     struct intel_batchbuffer *batch = encoder_context->base.batch;
1618     int i, size, offset = 0;
1619     intel_batchbuffer_start_atomic(batch, 0x4000); 
1620     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1621
1622     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1623         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1624         offset += size;
1625     }
1626
1627     intel_batchbuffer_end_atomic(batch);
1628     intel_batchbuffer_flush(batch);
1629 }
1630
1631 static void
1632 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1633                                struct encode_state *encode_state,
1634                                struct intel_encoder_context *encoder_context)
1635 {
1636     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1637     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1638     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1639     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1640 }
1641
1642 static dri_bo *
1643 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1644                                   struct encode_state *encode_state,
1645                                   struct intel_encoder_context *encoder_context)
1646 {
1647     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1648
1649     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1650     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1651
1652     return mfc_context->mfc_batchbuffer_surface.bo;
1653 }
1654
1655 #endif
1656
1657 static void
1658 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1659                                  struct encode_state *encode_state,
1660                                  struct intel_encoder_context *encoder_context)
1661 {
1662     struct intel_batchbuffer *batch = encoder_context->base.batch;
1663     dri_bo *slice_batch_bo;
1664
1665     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1666         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1667         assert(0);
1668         return; 
1669     }
1670
1671 #ifdef MFC_SOFTWARE_HASWELL
1672     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1673 #else
1674     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1675 #endif
1676
1677     // begin programing
1678     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1679     intel_batchbuffer_emit_mi_flush(batch);
1680     
1681     // picture level programing
1682     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1683
1684     BEGIN_BCS_BATCH(batch, 2);
1685     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1686     OUT_BCS_RELOC(batch,
1687                   slice_batch_bo,
1688                   I915_GEM_DOMAIN_COMMAND, 0, 
1689                   0);
1690     ADVANCE_BCS_BATCH(batch);
1691
1692     // end programing
1693     intel_batchbuffer_end_atomic(batch);
1694
1695     dri_bo_unreference(slice_batch_bo);
1696 }
1697
1698
1699 static VAStatus
1700 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1701                             struct encode_state *encode_state,
1702                             struct intel_encoder_context *encoder_context)
1703 {
1704     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1705     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1706     int current_frame_bits_size;
1707     int sts;
1708  
1709     for (;;) {
1710         gen75_mfc_init(ctx, encode_state, encoder_context);
1711         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1712         /*Programing bcs pipeline*/
1713         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1714         gen75_mfc_run(ctx, encode_state, encoder_context);
1715         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1716             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1717             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1718             if (sts == BRC_NO_HRD_VIOLATION) {
1719                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1720                 break;
1721             }
1722             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1723                 if (!mfc_context->hrd.violation_noted) {
1724                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1725                     mfc_context->hrd.violation_noted = 1;
1726                 }
1727                 return VA_STATUS_SUCCESS;
1728             }
1729         } else {
1730             break;
1731         }
1732     }
1733
1734     return VA_STATUS_SUCCESS;
1735 }
1736
1737
1738 static void
1739 gen75_mfc_context_destroy(void *context)
1740 {
1741     struct gen6_mfc_context *mfc_context = context;
1742     int i;
1743
1744     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1745     mfc_context->post_deblocking_output.bo = NULL;
1746
1747     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1748     mfc_context->pre_deblocking_output.bo = NULL;
1749
1750     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1751     mfc_context->uncompressed_picture_source.bo = NULL;
1752
1753     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1754     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1755
1756     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1757         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1758         mfc_context->direct_mv_buffers[i].bo = NULL;
1759     }
1760
1761     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1762     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1763
1764     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1765     mfc_context->macroblock_status_buffer.bo = NULL;
1766
1767     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1768     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1769
1770     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1771     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1772
1773
1774     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1775         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1776         mfc_context->reference_surfaces[i].bo = NULL;  
1777     }
1778
1779     i965_gpe_context_destroy(&mfc_context->gpe_context);
1780
1781     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1782     mfc_context->mfc_batchbuffer_surface.bo = NULL;
1783
1784     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1785     mfc_context->aux_batchbuffer_surface.bo = NULL;
1786
1787     if (mfc_context->aux_batchbuffer)
1788         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1789
1790     mfc_context->aux_batchbuffer = NULL;
1791
1792     free(mfc_context);
1793 }
1794
1795 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1796                   VAProfile profile,
1797                   struct encode_state *encode_state,
1798                   struct intel_encoder_context *encoder_context)
1799 {
1800     VAStatus vaStatus;
1801
1802     switch (profile) {
1803     case VAProfileH264Baseline:
1804     case VAProfileH264Main:
1805     case VAProfileH264High:
1806         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1807         break;
1808
1809         /* FIXME: add for other profile */
1810     default:
1811         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1812         break;
1813     }
1814
1815     return vaStatus;
1816 }
1817
1818 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1819 {
1820     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1821
1822     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1823
1824     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1825     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1826
1827     mfc_context->gpe_context.curbe.length = 32 * 4;
1828
1829     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1830     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1831     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1832     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1833     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1834
1835     i965_gpe_load_kernels(ctx,
1836                           &mfc_context->gpe_context,
1837                           gen75_mfc_kernels,
1838                           NUM_MFC_KERNEL);
1839
1840     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1841     mfc_context->set_surface_state = gen75_mfc_surface_state;
1842     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1843     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1844     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1845     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1846     mfc_context->insert_object = gen75_mfc_avc_insert_object;
1847     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1848
1849     encoder_context->mfc_context = mfc_context;
1850     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1851     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1852     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1853
1854     return True;
1855 }