Internal flag for the coded buffer
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94
95     assert(standard_select == MFX_FORMAT_MPEG2 ||
96            standard_select == MFX_FORMAT_AVC);
97
98     BEGIN_BCS_BATCH(batch, 5);
99
100     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
101     OUT_BCS_BATCH(batch,
102                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
103                   (MFD_MODE_VLD << 15) | /* VLD mode */
104                   (0 << 10) | /* Stream-Out Enable */
105                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
106                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
107                   (0 << 5)  | /* not in stitch mode */
108                   (1 << 4)  | /* encoding mode */
109                   (standard_select << 0));  /* standard select: avc or mpeg2 */
110     OUT_BCS_BATCH(batch,
111                   (0 << 7)  | /* expand NOA bus flag */
112                   (0 << 6)  | /* disable slice-level clock gating */
113                   (0 << 5)  | /* disable clock gating for NOA */
114                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
115                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
116                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
117                   (0 << 1)  |
118                   (0 << 0));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch, 0);
121
122     ADVANCE_BCS_BATCH(batch);
123 }
124
125 static void
126 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
127 {
128     struct intel_batchbuffer *batch = encoder_context->base.batch;
129     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
130
131     BEGIN_BCS_BATCH(batch, 6);
132
133     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
134     OUT_BCS_BATCH(batch, 0);
135     OUT_BCS_BATCH(batch,
136                   ((mfc_context->surface_state.height - 1) << 18) |
137                   ((mfc_context->surface_state.width - 1) << 4));
138     OUT_BCS_BATCH(batch,
139                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
140                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
141                   (0 << 22) | /* surface object control state, FIXME??? */
142                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
143                   (0 << 2)  | /* must be 0 for interleave U/V */
144                   (1 << 1)  | /* must be tiled */
145                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
146     OUT_BCS_BATCH(batch,
147                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
148                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
149     OUT_BCS_BATCH(batch, 0);
150
151     ADVANCE_BCS_BATCH(batch);
152 }
153
154 static void
155 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
156                                 struct intel_encoder_context *encoder_context)
157 {
158     struct intel_batchbuffer *batch = encoder_context->base.batch;
159     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     BEGIN_BCS_BATCH(batch, 26);
163
164     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
165         /* the DW1-3 is for the MFX indirect bistream offset */
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169         /* the DW4-5 is the MFX upper bound */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172
173     /* the DW6-10 is for MFX Indirect MV Object Base Address */
174     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
178     OUT_BCS_BATCH(batch, 0);
179
180      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186
187      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193
194     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
195     OUT_BCS_RELOC(batch,
196                   mfc_context->mfc_indirect_pak_bse_object.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   0);
199     OUT_BCS_BATCH(batch, 0);
200     OUT_BCS_BATCH(batch, 0);
201         
202     OUT_BCS_RELOC(batch,
203                   mfc_context->mfc_indirect_pak_bse_object.bo,
204                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
206     OUT_BCS_BATCH(batch, 0);
207
208     ADVANCE_BCS_BATCH(batch);
209 }
210
211 static void
212 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
213 {
214     struct intel_batchbuffer *batch = encoder_context->base.batch;
215     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
216     struct gen6_vme_context *vme_context = encoder_context->vme_context;
217     struct i965_driver_data *i965 = i965_driver_data(ctx);
218
219     if (IS_STEPPING_BPLUS(i965)) {
220         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
221         return;
222     }
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     OUT_BCS_BATCH(batch,
263                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
264     OUT_BCS_BATCH(batch, 
265                   ((height_in_mbs - 1) << 16) | 
266                   ((width_in_mbs - 1) << 0));
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
293     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
294                   (0xBB8 << 16) |       /* InterMbMaxSz */
295                   (0xEE8) );            /* IntraMbMaxSz */
296     OUT_BCS_BATCH(batch, 0);            /* Reserved */
297     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
298     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
299     OUT_BCS_BATCH(batch, 0x8C000000);
300     OUT_BCS_BATCH(batch, 0x00010000);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305
306     ADVANCE_BCS_BATCH(batch);
307 }
308
309 static void
310 gen75_mfc_qm_state(VADriverContextP ctx,
311                   int qm_type,
312                   unsigned int *qm,
313                   int qm_length,
314                   struct intel_encoder_context *encoder_context)
315 {
316     struct intel_batchbuffer *batch = encoder_context->base.batch;
317     unsigned int qm_buffer[16];
318
319     assert(qm_length <= 16);
320     assert(sizeof(*qm) == 4);
321     memcpy(qm_buffer, qm, qm_length * 4);
322
323     BEGIN_BCS_BATCH(batch, 18);
324     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
325     OUT_BCS_BATCH(batch, qm_type << 0);
326     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
327     ADVANCE_BCS_BATCH(batch);
328 }
329
330 static void
331 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
332 {
333     unsigned int qm[16] = {
334         0x10101010, 0x10101010, 0x10101010, 0x10101010,
335         0x10101010, 0x10101010, 0x10101010, 0x10101010,
336         0x10101010, 0x10101010, 0x10101010, 0x10101010,
337         0x10101010, 0x10101010, 0x10101010, 0x10101010
338     };
339
340     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
341     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
342     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
343     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
344 }
345
346 static void
347 gen75_mfc_fqm_state(VADriverContextP ctx,
348                    int fqm_type,
349                    unsigned int *fqm,
350                    int fqm_length,
351                    struct intel_encoder_context *encoder_context)
352 {
353     struct intel_batchbuffer *batch = encoder_context->base.batch;
354     unsigned int fqm_buffer[32];
355
356     assert(fqm_length <= 32);
357     assert(sizeof(*fqm) == 4);
358     memcpy(fqm_buffer, fqm, fqm_length * 4);
359
360     BEGIN_BCS_BATCH(batch, 34);
361     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
362     OUT_BCS_BATCH(batch, fqm_type << 0);
363     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
369 {
370     unsigned int qm[32] = {
371         0x10001000, 0x10001000, 0x10001000, 0x10001000,
372         0x10001000, 0x10001000, 0x10001000, 0x10001000,
373         0x10001000, 0x10001000, 0x10001000, 0x10001000,
374         0x10001000, 0x10001000, 0x10001000, 0x10001000,
375         0x10001000, 0x10001000, 0x10001000, 0x10001000,
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000
379     };
380
381     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
382     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
383     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
384     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
385 }
386
387 static void
388 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
389                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
390                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
391                            struct intel_batchbuffer *batch)
392 {
393     if (batch == NULL)
394         batch = encoder_context->base.batch;
395
396     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
397
398     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
399     OUT_BCS_BATCH(batch,
400                   (0 << 16) |   /* always start at offset 0 */
401                   (data_bits_in_last_dw << 8) |
402                   (skip_emul_byte_count << 4) |
403                   (!!emulation_flag << 3) |
404                   ((!!is_last_header) << 2) |
405                   ((!!is_end_of_slice) << 1) |
406                   (0 << 0));    /* FIXME: ??? */
407     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
408
409     ADVANCE_BCS_BATCH(batch);
410 }
411
412
413 static void gen75_mfc_init(VADriverContextP ctx,
414                         struct encode_state *encode_state,
415                         struct intel_encoder_context *encoder_context)
416 {
417     struct i965_driver_data *i965 = i965_driver_data(ctx);
418     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
419     dri_bo *bo;
420     int i;
421     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
422     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
423     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
424
425     /*Encode common setup for MFC*/
426     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
427     mfc_context->post_deblocking_output.bo = NULL;
428
429     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
430     mfc_context->pre_deblocking_output.bo = NULL;
431
432     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
433     mfc_context->uncompressed_picture_source.bo = NULL;
434
435     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
436     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
437
438     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
439         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
440         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
441         mfc_context->direct_mv_buffers[i].bo = NULL;
442     }
443
444     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
445         if (mfc_context->reference_surfaces[i].bo != NULL)
446             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
447         mfc_context->reference_surfaces[i].bo = NULL;  
448     }
449
450     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
451     bo = dri_bo_alloc(i965->intel.bufmgr,
452                       "Buffer",
453                       width_in_mbs * 64,
454                       64);
455     assert(bo);
456     mfc_context->intra_row_store_scratch_buffer.bo = bo;
457
458     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
459     bo = dri_bo_alloc(i965->intel.bufmgr,
460                       "Buffer",
461                       width_in_mbs * height_in_mbs * 16,
462                       64);
463     assert(bo);
464     mfc_context->macroblock_status_buffer.bo = bo;
465
466     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
467     bo = dri_bo_alloc(i965->intel.bufmgr,
468                       "Buffer",
469                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
470                       64);
471     assert(bo);
472     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
473
474     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
475     bo = dri_bo_alloc(i965->intel.bufmgr,
476                       "Buffer",
477                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
478                       0x1000);
479     assert(bo);
480     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
481
482     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
483     mfc_context->mfc_batchbuffer_surface.bo = NULL;
484
485     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
486     mfc_context->aux_batchbuffer_surface.bo = NULL;
487
488     if (mfc_context->aux_batchbuffer)
489         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
490
491     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
492     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
493     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
494     mfc_context->aux_batchbuffer_surface.pitch = 16;
495     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
496     mfc_context->aux_batchbuffer_surface.size_block = 16;
497
498     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
499 }
500
501 static void
502 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
503                                 struct intel_encoder_context *encoder_context)
504 {
505     struct intel_batchbuffer *batch = encoder_context->base.batch;
506     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
507     int i;
508
509     BEGIN_BCS_BATCH(batch, 61);
510
511     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
512
513     /* the DW1-3 is for pre_deblocking */
514     if (mfc_context->pre_deblocking_output.bo)
515         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
516                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
517                       0);
518     else
519         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
520
521         OUT_BCS_BATCH(batch, 0);
522         OUT_BCS_BATCH(batch, 0);
523      /* the DW4-6 is for the post_deblocking */
524
525     if (mfc_context->post_deblocking_output.bo)
526         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
527                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
528                       0);                                                                                       /* post output addr  */ 
529     else
530         OUT_BCS_BATCH(batch, 0);
531         OUT_BCS_BATCH(batch, 0);
532         OUT_BCS_BATCH(batch, 0);
533
534      /* the DW7-9 is for the uncompressed_picture */
535     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
536                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
537                   0); /* uncompressed data */
538
539         OUT_BCS_BATCH(batch, 0);
540         OUT_BCS_BATCH(batch, 0);
541
542      /* the DW10-12 is for the mb status */
543     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
544                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
545                   0); /* StreamOut data*/
546         OUT_BCS_BATCH(batch, 0);
547         OUT_BCS_BATCH(batch, 0);
548
549      /* the DW13-15 is for the intra_row_store_scratch */
550     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0);   
553         OUT_BCS_BATCH(batch, 0);
554         OUT_BCS_BATCH(batch, 0);
555
556      /* the DW16-18 is for the deblocking filter */
557     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);
560         OUT_BCS_BATCH(batch, 0);
561         OUT_BCS_BATCH(batch, 0);
562
563     /* the DW 19-50 is for Reference pictures*/
564     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
565         if ( mfc_context->reference_surfaces[i].bo != NULL) {
566             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
567                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                           0);                   
569         } else {
570             OUT_BCS_BATCH(batch, 0);
571         }
572         OUT_BCS_BATCH(batch, 0);
573     }
574         OUT_BCS_BATCH(batch, 0);
575
576         /* The DW 52-54 is for the MB status buffer */
577     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
578                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
579                   0);                                                                                   /* Macroblock status buffer*/
580         
581         OUT_BCS_BATCH(batch, 0);
582         OUT_BCS_BATCH(batch, 0);
583
584         /* the DW 55-57 is the ILDB buffer */
585         OUT_BCS_BATCH(batch, 0);
586         OUT_BCS_BATCH(batch, 0);
587         OUT_BCS_BATCH(batch, 0);
588
589         /* the DW 58-60 is the second ILDB buffer */
590         OUT_BCS_BATCH(batch, 0);
591         OUT_BCS_BATCH(batch, 0);
592         OUT_BCS_BATCH(batch, 0);
593     ADVANCE_BCS_BATCH(batch);
594 }
595
596 static void
597 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
598 {
599     struct intel_batchbuffer *batch = encoder_context->base.batch;
600     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
601     struct i965_driver_data *i965 = i965_driver_data(ctx);
602     int i;
603
604     if (IS_STEPPING_BPLUS(i965)) {
605         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
606         return;
607     }
608
609     BEGIN_BCS_BATCH(batch, 25);
610
611     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
612
613     if (mfc_context->pre_deblocking_output.bo)
614         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
615                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
616                       0);
617     else
618         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
619
620     if (mfc_context->post_deblocking_output.bo)
621         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
622                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                       0);                                                                                       /* post output addr  */ 
624     else
625         OUT_BCS_BATCH(batch, 0);
626
627     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
628                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
629                   0);                                                                                   /* uncompressed data */
630     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
631                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                   0);                                                                                   /* StreamOut data*/
633     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
634                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                   0);   
636     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
637                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
638                   0);
639     /* 7..22 Reference pictures*/
640     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
641         if ( mfc_context->reference_surfaces[i].bo != NULL) {
642             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
643                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
644                           0);                   
645         } else {
646             OUT_BCS_BATCH(batch, 0);
647         }
648     }
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* Macroblock status buffer*/
652
653         OUT_BCS_BATCH(batch, 0);
654
655     ADVANCE_BCS_BATCH(batch);
656 }
657
658 static void
659 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
660                                 struct intel_encoder_context *encoder_context)
661 {
662     struct intel_batchbuffer *batch = encoder_context->base.batch;
663     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
664
665     int i;
666
667     BEGIN_BCS_BATCH(batch, 71);
668
669     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
670
671     /* Reference frames and Current frames */
672     /* the DW1-32 is for the direct MV for reference */
673     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
674         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
675             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
676                           I915_GEM_DOMAIN_INSTRUCTION, 0,
677                           0);
678             OUT_BCS_BATCH(batch, 0);
679         } else {
680             OUT_BCS_BATCH(batch, 0);
681             OUT_BCS_BATCH(batch, 0);
682         }
683     }
684         OUT_BCS_BATCH(batch, 0);
685
686         /* the DW34-36 is the MV for the current reference */
687         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
688                           I915_GEM_DOMAIN_INSTRUCTION, 0,
689                           0);
690
691         OUT_BCS_BATCH(batch, 0);
692         OUT_BCS_BATCH(batch, 0);
693
694     /* POL list */
695     for(i = 0; i < 32; i++) {
696         OUT_BCS_BATCH(batch, i/2);
697     }
698     OUT_BCS_BATCH(batch, 0);
699     OUT_BCS_BATCH(batch, 0);
700
701     ADVANCE_BCS_BATCH(batch);
702 }
703
704 static void
705 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
706 {
707     struct intel_batchbuffer *batch = encoder_context->base.batch;
708     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
709     struct i965_driver_data *i965 = i965_driver_data(ctx);
710     int i;
711
712     if (IS_STEPPING_BPLUS(i965)) {
713         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
714         return;
715     }
716
717     BEGIN_BCS_BATCH(batch, 69);
718
719     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
720
721     /* Reference frames and Current frames */
722     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
723         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
724             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
725                           I915_GEM_DOMAIN_INSTRUCTION, 0,
726                           0);
727         } else {
728             OUT_BCS_BATCH(batch, 0);
729         }
730     }
731
732     /* POL list */
733     for(i = 0; i < 32; i++) {
734         OUT_BCS_BATCH(batch, i/2);
735     }
736     OUT_BCS_BATCH(batch, 0);
737     OUT_BCS_BATCH(batch, 0);
738
739     ADVANCE_BCS_BATCH(batch);
740 }
741
742 static void
743 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
744 {
745     struct intel_batchbuffer *batch = encoder_context->base.batch;
746     int i;
747
748     BEGIN_BCS_BATCH(batch, 10);
749     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
750     OUT_BCS_BATCH(batch, 0);                  //Select L0
751     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
752     for(i = 0; i < 7; i++) {
753         OUT_BCS_BATCH(batch, 0x80808080);
754     }   
755     ADVANCE_BCS_BATCH(batch);
756
757     BEGIN_BCS_BATCH(batch, 10);
758     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
759     OUT_BCS_BATCH(batch, 1);                  //Select L1
760     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
761     for(i = 0; i < 7; i++) {
762         OUT_BCS_BATCH(batch, 0x80808080);
763     }   
764     ADVANCE_BCS_BATCH(batch);
765 }
766
767
768 static void
769 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
770                                 struct intel_encoder_context *encoder_context)
771 {
772     struct intel_batchbuffer *batch = encoder_context->base.batch;
773     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
774
775     BEGIN_BCS_BATCH(batch, 10);
776
777     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
778     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
779                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
780                   0);
781     OUT_BCS_BATCH(batch, 0);
782     OUT_BCS_BATCH(batch, 0);
783         
784         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787     OUT_BCS_BATCH(batch, 0);
788
789         /* the DW7-9 is for Bitplane Read Buffer Base Address */
790     OUT_BCS_BATCH(batch, 0);
791     OUT_BCS_BATCH(batch, 0);
792     OUT_BCS_BATCH(batch, 0);
793
794     ADVANCE_BCS_BATCH(batch);
795 }
796
797 static void
798 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
799 {
800     struct intel_batchbuffer *batch = encoder_context->base.batch;
801     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
802     struct i965_driver_data *i965 = i965_driver_data(ctx);
803
804     if (IS_STEPPING_BPLUS(i965)) {
805         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
806         return;
807     }
808
809     BEGIN_BCS_BATCH(batch, 4);
810
811     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
812     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
813                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
814                   0);
815     OUT_BCS_BATCH(batch, 0);
816     OUT_BCS_BATCH(batch, 0);
817
818     ADVANCE_BCS_BATCH(batch);
819 }
820
821
822 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
823                                       struct encode_state *encode_state,
824                                       struct intel_encoder_context *encoder_context)
825 {
826     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
827
828     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
829     mfc_context->set_surface_state(ctx, encoder_context);
830     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
831     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
832     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
833     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
834     mfc_context->avc_qm_state(ctx, encoder_context);
835     mfc_context->avc_fqm_state(ctx, encoder_context);
836     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
837     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
838 }
839
840
841 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
842                              struct encode_state *encode_state,
843                              struct intel_encoder_context *encoder_context)
844 {
845     struct intel_batchbuffer *batch = encoder_context->base.batch;
846
847     intel_batchbuffer_flush(batch);             //run the pipeline
848
849     return VA_STATUS_SUCCESS;
850 }
851
852
853 static VAStatus
854 gen75_mfc_stop(VADriverContextP ctx, 
855               struct encode_state *encode_state,
856               struct intel_encoder_context *encoder_context,
857               int *encoded_bits_size)
858 {
859     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
860     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
861     VACodedBufferSegment *coded_buffer_segment;
862     
863     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
864     assert(vaStatus == VA_STATUS_SUCCESS);
865     *encoded_bits_size = coded_buffer_segment->size * 8;
866     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
867
868     return VA_STATUS_SUCCESS;
869 }
870
871
872 static void
873 gen75_mfc_avc_slice_state(VADriverContextP ctx,
874                          VAEncPictureParameterBufferH264 *pic_param,
875                          VAEncSliceParameterBufferH264 *slice_param,
876                          struct encode_state *encode_state,
877                          struct intel_encoder_context *encoder_context,
878                          int rate_control_enable,
879                          int qp,
880                          struct intel_batchbuffer *batch)
881 {
882     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
883     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
884     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
885     int beginmb = slice_param->macroblock_address;
886     int endmb = beginmb + slice_param->num_macroblocks;
887     int beginx = beginmb % width_in_mbs;
888     int beginy = beginmb / width_in_mbs;
889     int nextx =  endmb % width_in_mbs;
890     int nexty = endmb / width_in_mbs;
891     int slice_type = slice_param->slice_type;
892     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
893     int bit_rate_control_target, maxQpN, maxQpP;
894     unsigned char correct[6], grow, shrink;
895     int i;
896     int weighted_pred_idc = 0;
897     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
898     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
899
900     if (batch == NULL)
901         batch = encoder_context->base.batch;
902
903     bit_rate_control_target = slice_type;
904     if (slice_type == SLICE_TYPE_SP)
905         bit_rate_control_target = SLICE_TYPE_P;
906     else if (slice_type == SLICE_TYPE_SI)
907         bit_rate_control_target = SLICE_TYPE_I;
908
909     if (slice_type == SLICE_TYPE_P) {
910         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
911     } else if (slice_type == SLICE_TYPE_B) {
912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
913
914         if (weighted_pred_idc == 2) {
915             /* 8.4.3 - Derivation process for prediction weights (8-279) */
916             luma_log2_weight_denom = 5;
917             chroma_log2_weight_denom = 5;
918         }
919     }
920
921     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
922     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
923
924     for (i = 0; i < 6; i++)
925         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
926
927     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
928         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
929     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
930         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
931
932     BEGIN_BCS_BATCH(batch, 11);;
933
934     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
935     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
936
937     if (slice_type == SLICE_TYPE_I) {
938         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
939     } else {
940         OUT_BCS_BATCH(batch,
941                       (1 << 16) |                       /*1 reference frame*/
942                       (chroma_log2_weight_denom << 8) |
943                       (luma_log2_weight_denom << 0));
944     }
945
946     OUT_BCS_BATCH(batch, 
947                   (weighted_pred_idc << 30) |
948                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
949                   (slice_param->disable_deblocking_filter_idc << 27) |
950                   (slice_param->cabac_init_idc << 24) |
951                   (qp<<16) |                    /*Slice Quantization Parameter*/
952                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
953                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
954     OUT_BCS_BATCH(batch,
955                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
956                   (beginx << 16) |
957                   slice_param->macroblock_address );
958     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
959     OUT_BCS_BATCH(batch, 
960                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
961                   (1 << 30) |           /*ResetRateControlCounter*/
962                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
963                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
964                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
965                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
966                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
967                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
968                   (last_slice << 19) |     /*IsLastSlice*/
969                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
970                   (1 << 17) |       /*HeaderPresentFlag*/       
971                   (1 << 16) |       /*SliceData PresentFlag*/
972                   (1 << 15) |       /*TailPresentFlag*/
973                   (1 << 13) |       /*RBSP NAL TYPE*/   
974                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
975     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
976     OUT_BCS_BATCH(batch,
977                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
978                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
979                   (shrink << 8)  |
980                   (grow << 0));   
981     OUT_BCS_BATCH(batch,
982                   (correct[5] << 20) |
983                   (correct[4] << 16) |
984                   (correct[3] << 12) |
985                   (correct[2] << 8) |
986                   (correct[1] << 4) |
987                   (correct[0] << 0));
988     OUT_BCS_BATCH(batch, 0);
989
990     ADVANCE_BCS_BATCH(batch);
991 }
992
993
994 #ifdef MFC_SOFTWARE_HASWELL
995
996 static int
997 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
998                                 int qp,unsigned int *msg,
999                               struct intel_encoder_context *encoder_context,
1000                               unsigned char target_mb_size, unsigned char max_mb_size,
1001                               struct intel_batchbuffer *batch)
1002 {
1003     int len_in_dwords = 12;
1004     unsigned int intra_msg;
1005 #define         INTRA_MSG_FLAG          (1 << 13)
1006 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1007     if (batch == NULL)
1008         batch = encoder_context->base.batch;
1009
1010     BEGIN_BCS_BATCH(batch, len_in_dwords);
1011
1012     intra_msg = msg[0] & 0xC0FF;
1013     intra_msg |= INTRA_MSG_FLAG;
1014     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1015     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 0);
1018     OUT_BCS_BATCH(batch, 
1019                   (0 << 24) |           /* PackedMvNum, Debug*/
1020                   (0 << 20) |           /* No motion vector */
1021                   (1 << 19) |           /* CbpDcY */
1022                   (1 << 18) |           /* CbpDcU */
1023                   (1 << 17) |           /* CbpDcV */
1024                   intra_msg);
1025
1026     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1027     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1028     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1029
1030     /*Stuff for Intra MB*/
1031     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1032     OUT_BCS_BATCH(batch, msg[2]);       
1033     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1034     
1035     /*MaxSizeInWord and TargetSzieInWord*/
1036     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1037                   (target_mb_size << 16) );
1038
1039     OUT_BCS_BATCH(batch, 0);
1040
1041     ADVANCE_BCS_BATCH(batch);
1042
1043     return len_in_dwords;
1044 }
1045
1046 static int
1047 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1048                               unsigned int *msg, unsigned int offset,
1049                               struct intel_encoder_context *encoder_context,
1050                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1051                               struct intel_batchbuffer *batch)
1052 {
1053     int len_in_dwords = 12;
1054         unsigned int inter_msg = 0;
1055     if (batch == NULL)
1056         batch = encoder_context->base.batch;
1057     {
1058 #define MSG_MV_OFFSET   4
1059         unsigned int *mv_ptr;
1060         mv_ptr = msg + MSG_MV_OFFSET;
1061         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1062          * to convert them to be compatible with the format of AVC_PAK
1063          * command.
1064          */
1065         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1066                 /* MV[0] and MV[2] are replicated */
1067                 mv_ptr[4] = mv_ptr[0];
1068                 mv_ptr[5] = mv_ptr[1];
1069                 mv_ptr[2] = mv_ptr[8];
1070                 mv_ptr[3] = mv_ptr[9];
1071                 mv_ptr[6] = mv_ptr[8]; 
1072                 mv_ptr[7] = mv_ptr[9]; 
1073         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1074                 /* MV[0] and MV[1] are replicated */
1075                 mv_ptr[2] = mv_ptr[0];  
1076                 mv_ptr[3] = mv_ptr[1];
1077                 mv_ptr[4] = mv_ptr[16]; 
1078                 mv_ptr[5] = mv_ptr[17]; 
1079                 mv_ptr[6] = mv_ptr[24];
1080                 mv_ptr[7] = mv_ptr[25];
1081         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1082                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1083                 /* Don't touch MV[0] or MV[1] */
1084                 mv_ptr[2] = mv_ptr[8];
1085                 mv_ptr[3] = mv_ptr[9];
1086                 mv_ptr[4] = mv_ptr[16];
1087                 mv_ptr[5] = mv_ptr[17];
1088                 mv_ptr[6] = mv_ptr[24];
1089                 mv_ptr[7] = mv_ptr[25];
1090         }
1091     }
1092
1093     BEGIN_BCS_BATCH(batch, len_in_dwords);
1094
1095     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1096
1097         inter_msg = 32;
1098         /* MV quantity */
1099         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1100                 if (msg[1] & SUBMB_SHAPE_MASK)
1101                         inter_msg = 128;
1102         }
1103     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1104     OUT_BCS_BATCH(batch, offset);
1105         inter_msg = msg[0] & (0x1F00FFFF);
1106         inter_msg |= INTER_MV8;
1107         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1108         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1109                         (msg[1] & SUBMB_SHAPE_MASK)) {
1110                 inter_msg |= INTER_MV32;
1111         }
1112
1113     OUT_BCS_BATCH(batch, inter_msg);
1114
1115     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1116     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1117 #if 0 
1118     if ( slice_type == SLICE_TYPE_B) {
1119         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1120     } else {
1121         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1122     }
1123 #else
1124     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1125 #endif
1126
1127         inter_msg = msg[1] >> 8;
1128     /*Stuff for Inter MB*/
1129     OUT_BCS_BATCH(batch, inter_msg);        
1130     OUT_BCS_BATCH(batch, 0x0);    
1131     OUT_BCS_BATCH(batch, 0x0);        
1132
1133     /*MaxSizeInWord and TargetSzieInWord*/
1134     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1135                   (target_mb_size << 16) );
1136
1137     OUT_BCS_BATCH(batch, 0x0);    
1138
1139     ADVANCE_BCS_BATCH(batch);
1140
1141     return len_in_dwords;
1142 }
1143
1144 #define         INTRA_RDO_OFFSET        4
1145 #define         INTER_RDO_OFFSET        54
1146 #define         INTER_MSG_OFFSET        52
1147 #define         INTER_MV_OFFSET         224
1148 #define         RDO_MASK                0xFFFF
1149
1150 static void 
1151 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1152                                        struct encode_state *encode_state,
1153                                        struct intel_encoder_context *encoder_context,
1154                                        int slice_index,
1155                                        struct intel_batchbuffer *slice_batch)
1156 {
1157     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1158     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1159     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1160     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1161     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1162     unsigned int *msg = NULL, offset = 0;
1163     unsigned char *msg_ptr = NULL;
1164     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1165     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1166     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1167     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1168     int i,x,y;
1169     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1170     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1171     unsigned char *slice_header = NULL;
1172     int slice_header_length_in_bits = 0;
1173     unsigned int tail_data[] = { 0x0, 0x0 };
1174     int slice_type = pSliceParameter->slice_type;
1175
1176
1177     if (rate_control_mode == VA_RC_CBR) {
1178         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1179         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1180     }
1181
1182     /* only support for 8-bit pixel bit-depth */
1183     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1184     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1185     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1186     assert(qp >= 0 && qp < 52);
1187
1188     gen75_mfc_avc_slice_state(ctx, 
1189                              pPicParameter,
1190                              pSliceParameter,
1191                              encode_state, encoder_context,
1192                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1193
1194     if ( slice_index == 0) 
1195         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1196
1197     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1198
1199     // slice hander
1200     mfc_context->insert_object(ctx, encoder_context,
1201                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1202                                5,  /* first 5 bytes are start code + nal unit type */
1203                                1, 0, 1, slice_batch);
1204
1205     dri_bo_map(vme_context->vme_output.bo , 1);
1206     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1207
1208     if (is_intra) {
1209         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1210     } else {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     }
1213    
1214     for (i = pSliceParameter->macroblock_address; 
1215          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217         x = i % width_in_mbs;
1218         y = i / width_in_mbs;
1219         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220
1221         if (is_intra) {
1222             assert(msg);
1223             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1224         } else {
1225             int inter_rdo, intra_rdo;
1226             inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1227             intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1228             offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1229             if (intra_rdo < inter_rdo) { 
1230                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1231             } else {
1232                 msg += INTER_MSG_OFFSET;
1233                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1234             }
1235         }
1236     }
1237    
1238     dri_bo_unmap(vme_context->vme_output.bo);
1239
1240     if ( last_slice ) {    
1241         mfc_context->insert_object(ctx, encoder_context,
1242                                    tail_data, 2, 8,
1243                                    2, 1, 1, 0, slice_batch);
1244     } else {
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 1, 8,
1247                                    1, 1, 1, 0, slice_batch);
1248     }
1249
1250     free(slice_header);
1251
1252 }
1253
1254 static dri_bo *
1255 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1256                                   struct encode_state *encode_state,
1257                                   struct intel_encoder_context *encoder_context)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch;
1261     dri_bo *batch_bo;
1262     int i;
1263     int buffer_size;
1264     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1265     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1266     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1267
1268     buffer_size = width_in_mbs * height_in_mbs * 64;
1269     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1270     batch_bo = batch->buffer;
1271     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1272         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1273     }
1274
1275     intel_batchbuffer_align(batch, 8);
1276     
1277     BEGIN_BCS_BATCH(batch, 2);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1280     ADVANCE_BCS_BATCH(batch);
1281
1282     dri_bo_reference(batch_bo);
1283     intel_batchbuffer_free(batch);
1284
1285     return batch_bo;
1286 }
1287
1288 #else
1289
1290 static void
1291 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1292                                     struct encode_state *encode_state,
1293                                     struct intel_encoder_context *encoder_context)
1294
1295 {
1296     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1297     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1298
1299     assert(vme_context->vme_output.bo);
1300     mfc_context->buffer_suface_setup(ctx,
1301                                      &mfc_context->gpe_context,
1302                                      &vme_context->vme_output,
1303                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1304                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1305     assert(mfc_context->aux_batchbuffer_surface.bo);
1306     mfc_context->buffer_suface_setup(ctx,
1307                                      &mfc_context->gpe_context,
1308                                      &mfc_context->aux_batchbuffer_surface,
1309                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1310                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1311 }
1312
1313 static void
1314 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1315                                      struct encode_state *encode_state,
1316                                      struct intel_encoder_context *encoder_context)
1317
1318 {
1319     struct i965_driver_data *i965 = i965_driver_data(ctx);
1320     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1321     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1322     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1323     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1324     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1325     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1326     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1327     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1328                                                            "MFC batchbuffer",
1329                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1330                                                            0x1000);
1331     mfc_context->buffer_suface_setup(ctx,
1332                                      &mfc_context->gpe_context,
1333                                      &mfc_context->mfc_batchbuffer_surface,
1334                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1335                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1336 }
1337
1338 static void
1339 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1340                                     struct encode_state *encode_state,
1341                                     struct intel_encoder_context *encoder_context)
1342 {
1343     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1344     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1345 }
1346
1347 static void
1348 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1349                                 struct encode_state *encode_state,
1350                                 struct intel_encoder_context *encoder_context)
1351 {
1352     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1353     struct gen6_interface_descriptor_data *desc;   
1354     int i;
1355     dri_bo *bo;
1356
1357     bo = mfc_context->gpe_context.idrt.bo;
1358     dri_bo_map(bo, 1);
1359     assert(bo->virtual);
1360     desc = bo->virtual;
1361
1362     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1363         struct i965_kernel *kernel;
1364
1365         kernel = &mfc_context->gpe_context.kernels[i];
1366         assert(sizeof(*desc) == 32);
1367
1368         /*Setup the descritor table*/
1369         memset(desc, 0, sizeof(*desc));
1370         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1371         desc->desc2.sampler_count = 0;
1372         desc->desc2.sampler_state_pointer = 0;
1373         desc->desc3.binding_table_entry_count = 2;
1374         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1375         desc->desc4.constant_urb_entry_read_offset = 0;
1376         desc->desc4.constant_urb_entry_read_length = 4;
1377                 
1378         /*kernel start*/
1379         dri_bo_emit_reloc(bo,   
1380                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1381                           0,
1382                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1383                           kernel->bo);
1384         desc++;
1385     }
1386
1387     dri_bo_unmap(bo);
1388 }
1389
1390 static void
1391 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1392                                     struct encode_state *encode_state,
1393                                     struct intel_encoder_context *encoder_context)
1394 {
1395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1396     
1397     (void)mfc_context;
1398 }
1399
1400 static void
1401 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1402                                          int index,
1403                                          int head_offset,
1404                                          int batchbuffer_offset,
1405                                          int head_size,
1406                                          int tail_size,
1407                                          int number_mb_cmds,
1408                                          int first_object,
1409                                          int last_object,
1410                                          int last_slice,
1411                                          int mb_x,
1412                                          int mb_y,
1413                                          int width_in_mbs,
1414                                          int qp)
1415 {
1416     BEGIN_BATCH(batch, 12);
1417     
1418     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1419     OUT_BATCH(batch, index);
1420     OUT_BATCH(batch, 0);
1421     OUT_BATCH(batch, 0);
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424    
1425     /*inline data */
1426     OUT_BATCH(batch, head_offset);
1427     OUT_BATCH(batch, batchbuffer_offset);
1428     OUT_BATCH(batch, 
1429               head_size << 16 |
1430               tail_size);
1431     OUT_BATCH(batch,
1432               number_mb_cmds << 16 |
1433               first_object << 2 |
1434               last_object << 1 |
1435               last_slice);
1436     OUT_BATCH(batch,
1437               mb_y << 8 |
1438               mb_x);
1439     OUT_BATCH(batch,
1440               qp << 16 |
1441               width_in_mbs);
1442
1443     ADVANCE_BATCH(batch);
1444 }
1445
1446 static void
1447 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1448                                        struct intel_encoder_context *encoder_context,
1449                                        VAEncSliceParameterBufferH264 *slice_param,
1450                                        int head_offset,
1451                                        unsigned short head_size,
1452                                        unsigned short tail_size,
1453                                        int batchbuffer_offset,
1454                                        int qp,
1455                                        int last_slice)
1456 {
1457     struct intel_batchbuffer *batch = encoder_context->base.batch;
1458     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1459     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1460     int total_mbs = slice_param->num_macroblocks;
1461     int number_mb_cmds = 128;
1462     int starting_mb = 0;
1463     int last_object = 0;
1464     int first_object = 1;
1465     int i;
1466     int mb_x, mb_y;
1467     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1468
1469     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1470         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1471         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1472         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1473         assert(mb_x <= 255 && mb_y <= 255);
1474
1475         starting_mb += number_mb_cmds;
1476
1477         gen75_mfc_batchbuffer_emit_object_command(batch,
1478                                                  index,
1479                                                  head_offset,
1480                                                  batchbuffer_offset,
1481                                                  head_size,
1482                                                  tail_size,
1483                                                  number_mb_cmds,
1484                                                  first_object,
1485                                                  last_object,
1486                                                  last_slice,
1487                                                  mb_x,
1488                                                  mb_y,
1489                                                  width_in_mbs,
1490                                                  qp);
1491
1492         if (first_object) {
1493             head_offset += head_size;
1494             batchbuffer_offset += head_size;
1495         }
1496
1497         if (last_object) {
1498             head_offset += tail_size;
1499             batchbuffer_offset += tail_size;
1500         }
1501
1502         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1503
1504         first_object = 0;
1505     }
1506
1507     if (!last_object) {
1508         last_object = 1;
1509         number_mb_cmds = total_mbs % number_mb_cmds;
1510         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1511         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1512         assert(mb_x <= 255 && mb_y <= 255);
1513         starting_mb += number_mb_cmds;
1514
1515         gen75_mfc_batchbuffer_emit_object_command(batch,
1516                                                  index,
1517                                                  head_offset,
1518                                                  batchbuffer_offset,
1519                                                  head_size,
1520                                                  tail_size,
1521                                                  number_mb_cmds,
1522                                                  first_object,
1523                                                  last_object,
1524                                                  last_slice,
1525                                                  mb_x,
1526                                                  mb_y,
1527                                                  width_in_mbs,
1528                                                  qp);
1529     }
1530 }
1531                           
1532 /*
1533  * return size in Owords (16bytes)
1534  */         
1535 static int
1536 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1537                                struct encode_state *encode_state,
1538                                struct intel_encoder_context *encoder_context,
1539                                int slice_index,
1540                                int batchbuffer_offset)
1541 {
1542     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1543     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1544     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1545     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1546     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1547     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1548     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1549     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1550     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1551     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1552     unsigned char *slice_header = NULL;
1553     int slice_header_length_in_bits = 0;
1554     unsigned int tail_data[] = { 0x0, 0x0 };
1555     long head_offset;
1556     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1557     unsigned short head_size, tail_size;
1558     int slice_type = pSliceParameter->slice_type;
1559
1560     if (rate_control_mode == VA_RC_CBR) {
1561         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1562         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1563     }
1564
1565     /* only support for 8-bit pixel bit-depth */
1566     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1567     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1568     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1569     assert(qp >= 0 && qp < 52);
1570
1571     head_offset = old_used / 16;
1572     gen75_mfc_avc_slice_state(ctx,
1573                              pPicParameter,
1574                              pSliceParameter,
1575                              encode_state,
1576                              encoder_context,
1577                              (rate_control_mode == VA_RC_CBR),
1578                              qp,
1579                              slice_batch);
1580
1581     if (slice_index == 0)
1582         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1583
1584     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1585
1586     // slice hander
1587     mfc_context->insert_object(ctx,
1588                                encoder_context,
1589                                (unsigned int *)slice_header,
1590                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1591                                slice_header_length_in_bits & 0x1f,
1592                                5,  /* first 5 bytes are start code + nal unit type */
1593                                1,
1594                                0,
1595                                1,
1596                                slice_batch);
1597     free(slice_header);
1598
1599     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1600     used = intel_batchbuffer_used_size(slice_batch);
1601     head_size = (used - old_used) / 16;
1602     old_used = used;
1603
1604     /* tail */
1605     if (last_slice) {    
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    2,
1610                                    8,
1611                                    2,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     } else {
1617         mfc_context->insert_object(ctx,
1618                                    encoder_context,
1619                                    tail_data,
1620                                    1,
1621                                    8,
1622                                    1,
1623                                    1,
1624                                    1,
1625                                    0,
1626                                    slice_batch);
1627     }
1628
1629     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1630     used = intel_batchbuffer_used_size(slice_batch);
1631     tail_size = (used - old_used) / 16;
1632
1633    
1634     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1635                                            encoder_context,
1636                                            pSliceParameter,
1637                                            head_offset,
1638                                            head_size,
1639                                            tail_size,
1640                                            batchbuffer_offset,
1641                                            qp,
1642                                            last_slice);
1643
1644     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1645 }
1646
1647 static void
1648 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1649                                   struct encode_state *encode_state,
1650                                   struct intel_encoder_context *encoder_context)
1651 {
1652     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1653     struct intel_batchbuffer *batch = encoder_context->base.batch;
1654     int i, size, offset = 0;
1655     intel_batchbuffer_start_atomic(batch, 0x4000); 
1656     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1657
1658     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1659         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1660         offset += size;
1661     }
1662
1663     intel_batchbuffer_end_atomic(batch);
1664     intel_batchbuffer_flush(batch);
1665 }
1666
1667 static void
1668 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1669                                struct encode_state *encode_state,
1670                                struct intel_encoder_context *encoder_context)
1671 {
1672     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1673     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1674     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1675     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1676 }
1677
1678 static dri_bo *
1679 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1680                                   struct encode_state *encode_state,
1681                                   struct intel_encoder_context *encoder_context)
1682 {
1683     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1684
1685     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1686     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1687
1688     return mfc_context->mfc_batchbuffer_surface.bo;
1689 }
1690
1691 #endif
1692
1693 static void
1694 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1695                                  struct encode_state *encode_state,
1696                                  struct intel_encoder_context *encoder_context)
1697 {
1698     struct intel_batchbuffer *batch = encoder_context->base.batch;
1699     dri_bo *slice_batch_bo;
1700
1701     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1702         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1703         assert(0);
1704         return; 
1705     }
1706
1707 #ifdef MFC_SOFTWARE_HASWELL
1708     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1709 #else
1710     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1711 #endif
1712
1713     // begin programing
1714     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1715     intel_batchbuffer_emit_mi_flush(batch);
1716     
1717     // picture level programing
1718     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1719
1720     BEGIN_BCS_BATCH(batch, 2);
1721     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1722     OUT_BCS_RELOC(batch,
1723                   slice_batch_bo,
1724                   I915_GEM_DOMAIN_COMMAND, 0, 
1725                   0);
1726     ADVANCE_BCS_BATCH(batch);
1727
1728     // end programing
1729     intel_batchbuffer_end_atomic(batch);
1730
1731     dri_bo_unreference(slice_batch_bo);
1732 }
1733
1734
1735 static VAStatus
1736 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1737                             struct encode_state *encode_state,
1738                             struct intel_encoder_context *encoder_context)
1739 {
1740     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1741     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1742     int current_frame_bits_size;
1743     int sts;
1744  
1745     for (;;) {
1746         gen75_mfc_init(ctx, encode_state, encoder_context);
1747         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1748         /*Programing bcs pipeline*/
1749         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1750         gen75_mfc_run(ctx, encode_state, encoder_context);
1751         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1752             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1753             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1754             if (sts == BRC_NO_HRD_VIOLATION) {
1755                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1756                 break;
1757             }
1758             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1759                 if (!mfc_context->hrd.violation_noted) {
1760                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1761                     mfc_context->hrd.violation_noted = 1;
1762                 }
1763                 return VA_STATUS_SUCCESS;
1764             }
1765         } else {
1766             break;
1767         }
1768     }
1769
1770     return VA_STATUS_SUCCESS;
1771 }
1772
1773 /*
1774  * MPEG-2
1775  */
1776
1777 static const int
1778 va_to_gen75_mpeg2_picture_type[3] = {
1779     1,  /* I */
1780     2,  /* P */
1781     3   /* B */
1782 };
1783
1784 static void
1785 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1786                           struct intel_encoder_context *encoder_context,
1787                           struct encode_state *encode_state)
1788 {
1789     struct intel_batchbuffer *batch = encoder_context->base.batch;
1790     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1791     VAEncPictureParameterBufferMPEG2 *pic_param;
1792     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1793     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1794
1795     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1796     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1797
1798     BEGIN_BCS_BATCH(batch, 13);
1799     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1800     OUT_BCS_BATCH(batch,
1801                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1802                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1803                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1804                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1805                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1806                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1807                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1808                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1809                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1810                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1811                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1812                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1813     OUT_BCS_BATCH(batch,
1814                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1815                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1816                   0);
1817     OUT_BCS_BATCH(batch,
1818                   1 << 31 |     /* slice concealment */
1819                   (height_in_mbs - 1) << 16 |
1820                   (width_in_mbs - 1));
1821     OUT_BCS_BATCH(batch, 0);
1822     OUT_BCS_BATCH(batch, 0);
1823     OUT_BCS_BATCH(batch,
1824                   0xFFF << 16 | /* InterMBMaxSize */
1825                   0xFFF << 0 |  /* IntraMBMaxSize */
1826                   0);
1827     OUT_BCS_BATCH(batch, 0);
1828     OUT_BCS_BATCH(batch, 0);
1829     OUT_BCS_BATCH(batch, 0);
1830     OUT_BCS_BATCH(batch, 0);
1831     OUT_BCS_BATCH(batch, 0);
1832     OUT_BCS_BATCH(batch, 0);
1833     ADVANCE_BCS_BATCH(batch);
1834 }
1835
1836 static void
1837 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1838 {
1839     unsigned int qm[16] = {
1840         0x10101010, 0x10101010, 0x10101010, 0x10101010,
1841         0x10101010, 0x10101010, 0x10101010, 0x10101010,
1842         0x10101010, 0x10101010, 0x10101010, 0x10101010,
1843         0x10101010, 0x10101010, 0x10101010, 0x10101010
1844     };
1845
1846     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, qm, 16, encoder_context);
1847     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, qm, 16,encoder_context);
1848 }
1849
1850 static void
1851 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1852 {
1853     unsigned int qm[32] = {
1854         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1855         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1856         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1857         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1858         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1859         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1860         0x10001000, 0x10001000, 0x10001000, 0x10001000,
1861         0x10001000, 0x10001000, 0x10001000, 0x10001000
1862     };
1863
1864     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, qm, 32, encoder_context);
1865     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, qm, 32, encoder_context);
1866 }
1867
1868 static void
1869 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1870                                  struct intel_encoder_context *encoder_context,
1871                                  int x, int y,
1872                                  int next_x, int next_y,
1873                                  int is_fisrt_slice_group,
1874                                  int is_last_slice_group,
1875                                  int intra_slice,
1876                                  int qp,
1877                                  struct intel_batchbuffer *batch)
1878 {
1879     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1880
1881     if (batch == NULL)
1882         batch = encoder_context->base.batch;
1883
1884     BEGIN_BCS_BATCH(batch, 8);
1885
1886     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1887     OUT_BCS_BATCH(batch,
1888                   0 << 31 |                             /* MbRateCtrlFlag */
1889                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1890                   1 << 17 |                             /* Insert Header before the first slice group data */
1891                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1892                   1 << 15 |                             /* TailPresentFlag: always 1 */
1893                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1894                   !!intra_slice << 13 |                 /* IntraSlice */
1895                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1896                   0);
1897     OUT_BCS_BATCH(batch,
1898                   next_y << 24 |
1899                   next_x << 16 |
1900                   y << 8 |
1901                   x << 0 |
1902                   0);
1903     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1904     /* bitstream pointer is only loaded once for the first slice of a frame when 
1905      * LoadSlicePointerFlag is 0
1906      */
1907     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1908     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1909     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1910     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1911
1912     ADVANCE_BCS_BATCH(batch);
1913 }
1914
1915 static int
1916 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1917                                  struct intel_encoder_context *encoder_context,
1918                                  int x, int y,
1919                                  int first_mb_in_slice,
1920                                  int last_mb_in_slice,
1921                                  int first_mb_in_slice_group,
1922                                  int last_mb_in_slice_group,
1923                                  int mb_type,
1924                                  int qp_scale_code,
1925                                  int coded_block_pattern,
1926                                  unsigned char target_size_in_word,
1927                                  unsigned char max_size_in_word,
1928                                  struct intel_batchbuffer *batch)
1929 {
1930     int len_in_dwords = 9;
1931
1932     if (batch == NULL)
1933         batch = encoder_context->base.batch;
1934
1935     BEGIN_BCS_BATCH(batch, len_in_dwords);
1936
1937     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1938     OUT_BCS_BATCH(batch,
1939                   0 << 24 |     /* PackedMvNum */
1940                   0 << 20 |     /* MvFormat */
1941                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1942                   0 << 15 |     /* TransformFlag: frame DCT */
1943                   0 << 14 |     /* FieldMbFlag */
1944                   1 << 13 |     /* IntraMbFlag */
1945                   mb_type << 8 |   /* MbType: Intra */
1946                   0 << 2 |      /* SkipMbFlag */
1947                   0 << 0 |      /* InterMbMode */
1948                   0);
1949     OUT_BCS_BATCH(batch, y << 16 | x);
1950     OUT_BCS_BATCH(batch,
1951                   max_size_in_word << 24 |
1952                   target_size_in_word << 16 |
1953                   coded_block_pattern << 0 |      /* CBP */
1954                   0);
1955     OUT_BCS_BATCH(batch,
1956                   last_mb_in_slice << 31 |
1957                   first_mb_in_slice << 30 |
1958                   0 << 27 |     /* EnableCoeffClamp */
1959                   last_mb_in_slice_group << 26 |
1960                   0 << 25 |     /* MbSkipConvDisable */
1961                   first_mb_in_slice_group << 24 |
1962                   0 << 16 |     /* MvFieldSelect */
1963                   qp_scale_code << 0 |
1964                   0);
1965     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1966     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1967     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1968     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1969
1970     ADVANCE_BCS_BATCH(batch);
1971
1972     return len_in_dwords;
1973 }
1974
1975 #define INTRA_RDO_OFFSET        4
1976 #define INTER_RDO_OFFSET        54
1977 #define INTER_MSG_OFFSET        52
1978 #define INTER_MV_OFFSET         224
1979 #define RDO_MASK                0xFFFF
1980
1981 static void
1982 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1983                                            struct encode_state *encode_state,
1984                                            struct intel_encoder_context *encoder_context,
1985                                            struct intel_batchbuffer *slice_batch)
1986 {
1987     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1988     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1989
1990     if (encode_state->packed_header_data[idx]) {
1991         VAEncPackedHeaderParameterBuffer *param = NULL;
1992         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1993         unsigned int length_in_bits;
1994
1995         assert(encode_state->packed_header_param[idx]);
1996         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1997         length_in_bits = param->bit_length;
1998
1999         mfc_context->insert_object(ctx,
2000                                    encoder_context,
2001                                    header_data,
2002                                    ALIGN(length_in_bits, 32) >> 5,
2003                                    length_in_bits & 0x1f,
2004                                    5,   /* FIXME: check it */
2005                                    0,
2006                                    0,
2007                                    !param->has_emulation_bytes,
2008                                    slice_batch);
2009     }
2010
2011     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2012
2013     if (encode_state->packed_header_data[idx]) {
2014         VAEncPackedHeaderParameterBuffer *param = NULL;
2015         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2016         unsigned int length_in_bits;
2017
2018         assert(encode_state->packed_header_param[idx]);
2019         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2020         length_in_bits = param->bit_length;
2021
2022         mfc_context->insert_object(ctx,
2023                                    encoder_context,
2024                                    header_data,
2025                                    ALIGN(length_in_bits, 32) >> 5,
2026                                    length_in_bits & 0x1f,
2027                                    5, /* FIXME: check it */
2028                                    0,
2029                                    0,
2030                                    !param->has_emulation_bytes,
2031                                    slice_batch);
2032     }
2033 }
2034
2035 static void 
2036 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2037                                      struct encode_state *encode_state,
2038                                      struct intel_encoder_context *encoder_context,
2039                                      int slice_index,
2040                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2041                                      struct intel_batchbuffer *slice_batch)
2042 {
2043     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2044     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2045     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2046     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2047     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2048     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2049     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2050     int i, j;
2051     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2052
2053     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2054     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2055     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2056     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2057
2058     if (next_slice_group_param) {
2059         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2060         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2061     } else {
2062         h_next_start_pos = 0;
2063         v_next_start_pos = height_in_mbs;
2064     }
2065
2066     gen75_mfc_mpeg2_slicegroup_state(ctx,
2067                                      encoder_context,
2068                                      h_start_pos,
2069                                      v_start_pos,
2070                                      h_next_start_pos,
2071                                      v_next_start_pos,
2072                                      slice_index == 0,
2073                                      next_slice_group_param == NULL,
2074                                      slice_param->is_intra_slice,
2075                                      slice_param->quantiser_scale_code,
2076                                      slice_batch);
2077
2078     if (slice_index == 0) 
2079         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2080
2081     /* Insert '00' to make sure the header is valid */
2082     mfc_context->insert_object(ctx,
2083                                encoder_context,
2084                                (unsigned int*)section_delimiter,
2085                                1,
2086                                8,   /* 8bits in the last DWORD */
2087                                1,   /* 1 byte */
2088                                1,
2089                                0,
2090                                0,
2091                                slice_batch);
2092
2093     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2094         /* PAK for each macroblocks */
2095         for (j = 0; j < slice_param->num_macroblocks; j++) {
2096             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2097             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2098             int first_mb_in_slice = (j == 0);
2099             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2100             int first_mb_in_slice_group = (i == 0 && j == 0);
2101             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2102                                           j == slice_param->num_macroblocks - 1);
2103
2104             if (slice_param->is_intra_slice) {
2105                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2106                                                  encoder_context,
2107                                                  h_pos, v_pos,
2108                                                  first_mb_in_slice,
2109                                                  last_mb_in_slice,
2110                                                  first_mb_in_slice_group,
2111                                                  last_mb_in_slice_group,
2112                                                  0x1a,
2113                                                  slice_param->quantiser_scale_code,
2114                                                  0x3f,
2115                                                  0,
2116                                                  0x3ff,
2117                                                  slice_batch);
2118             } else {
2119                 assert(0);
2120             }
2121         }
2122
2123         slice_param++;
2124     }
2125
2126     /* tail data */
2127     if (next_slice_group_param == NULL) { /* end of a picture */
2128         mfc_context->insert_object(ctx,
2129                                    encoder_context,
2130                                    (unsigned int *)tail_delimiter,
2131                                    2,
2132                                    8,   /* 8bits in the last DWORD */
2133                                    5,   /* 5 bytes */
2134                                    1,
2135                                    1,
2136                                    0,
2137                                    slice_batch);
2138     } else {        /* end of a lsice group */
2139         mfc_context->insert_object(ctx,
2140                                    encoder_context,
2141                                    (unsigned int *)section_delimiter,
2142                                    1,
2143                                    8,   /* 8bits in the last DWORD */
2144                                    1,   /* 1 byte */
2145                                    1,
2146                                    1,
2147                                    0,
2148                                    slice_batch);
2149     }
2150 }
2151
2152 /* 
2153  * A batch buffer for all slices, including slice state, 
2154  * slice insert object and slice pak object commands
2155  *
2156  */
2157 static dri_bo *
2158 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2159                                            struct encode_state *encode_state,
2160                                            struct intel_encoder_context *encoder_context)
2161 {
2162     struct i965_driver_data *i965 = i965_driver_data(ctx);
2163     struct intel_batchbuffer *batch;
2164     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2165     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2166     dri_bo *batch_bo;
2167     int i;
2168     int buffer_size;
2169     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2170     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2171
2172     buffer_size = width_in_mbs * height_in_mbs * 64;
2173     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2174     batch_bo = batch->buffer;
2175
2176     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2177         if (i == encode_state->num_slice_params_ext - 1)
2178             next_slice_group_param = NULL;
2179         else
2180             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2181
2182         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2183     }
2184
2185     intel_batchbuffer_align(batch, 8);
2186     
2187     BEGIN_BCS_BATCH(batch, 2);
2188     OUT_BCS_BATCH(batch, 0);
2189     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2190     ADVANCE_BCS_BATCH(batch);
2191
2192     dri_bo_reference(batch_bo);
2193     intel_batchbuffer_free(batch);
2194
2195     return batch_bo;
2196 }
2197
2198 static void
2199 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2200                                             struct encode_state *encode_state,
2201                                             struct intel_encoder_context *encoder_context)
2202 {
2203     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2204
2205     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2206     mfc_context->set_surface_state(ctx, encoder_context);
2207     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2208     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2209     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2210     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2211     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2212     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2213 }
2214
2215 static void
2216 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2217                                     struct encode_state *encode_state,
2218                                     struct intel_encoder_context *encoder_context)
2219 {
2220     struct intel_batchbuffer *batch = encoder_context->base.batch;
2221     dri_bo *slice_batch_bo;
2222
2223     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2224
2225     // begin programing
2226     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2227     intel_batchbuffer_emit_mi_flush(batch);
2228     
2229     // picture level programing
2230     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2231
2232     BEGIN_BCS_BATCH(batch, 2);
2233     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2234     OUT_BCS_RELOC(batch,
2235                   slice_batch_bo,
2236                   I915_GEM_DOMAIN_COMMAND, 0, 
2237                   0);
2238     ADVANCE_BCS_BATCH(batch);
2239
2240     // end programing
2241     intel_batchbuffer_end_atomic(batch);
2242
2243     dri_bo_unreference(slice_batch_bo);
2244 }
2245
2246 static VAStatus
2247 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2248                         struct encode_state *encode_state,
2249                         struct intel_encoder_context *encoder_context)
2250 {
2251     struct i965_driver_data *i965 = i965_driver_data(ctx);
2252     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2253     struct object_surface *obj_surface; 
2254     struct object_buffer *obj_buffer;
2255     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2256     struct i965_coded_buffer_segment *coded_buffer_segment;
2257     VAStatus vaStatus = VA_STATUS_SUCCESS;
2258     dri_bo *bo;
2259     int i;
2260
2261     /* reconstructed surface */
2262     obj_surface = SURFACE(pic_param->reconstructed_picture);
2263     assert(obj_surface);
2264     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2265     mfc_context->post_deblocking_output.bo = obj_surface->bo;
2266     dri_bo_reference(mfc_context->post_deblocking_output.bo);
2267     mfc_context->surface_state.width = obj_surface->orig_width;
2268     mfc_context->surface_state.height = obj_surface->orig_height;
2269     mfc_context->surface_state.w_pitch = obj_surface->width;
2270     mfc_context->surface_state.h_pitch = obj_surface->height;
2271
2272     /* forward reference */
2273     obj_surface = SURFACE(pic_param->forward_reference_picture);
2274
2275     if (obj_surface && obj_surface->bo) {
2276         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2277         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2278     } else
2279         mfc_context->reference_surfaces[0].bo = NULL;
2280
2281     /* backward reference */
2282     obj_surface = SURFACE(pic_param->backward_reference_picture);
2283
2284     if (obj_surface && obj_surface->bo) {
2285         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2286         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2287     } else {
2288         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2289
2290         if (mfc_context->reference_surfaces[1].bo)
2291             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2292     }
2293
2294     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2295         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2296
2297         if (mfc_context->reference_surfaces[i].bo)
2298             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2299     }
2300     
2301     /* input YUV surface */
2302     obj_surface = SURFACE(encoder_context->input_yuv_surface);
2303     assert(obj_surface && obj_surface->bo);
2304     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2305     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2306
2307     /* coded buffer */
2308     obj_buffer = BUFFER(pic_param->coded_buf);
2309     bo = obj_buffer->buffer_store->bo;
2310     assert(bo);
2311     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2312     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2313     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2314     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2315
2316     /* set the internal flag to 0 to indicate the coded size is unknown */
2317     dri_bo_map(bo, 1);
2318     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2319     coded_buffer_segment->mapped = 0;
2320     coded_buffer_segment->codec = CODED_MPEG2;
2321     dri_bo_unmap(bo);
2322
2323     return vaStatus;
2324 }
2325
2326 static VAStatus
2327 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2328                                struct encode_state *encode_state,
2329                                struct intel_encoder_context *encoder_context)
2330 {
2331     gen75_mfc_init(ctx, encode_state, encoder_context);
2332     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2333     /*Programing bcs pipeline*/
2334     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2335     gen75_mfc_run(ctx, encode_state, encoder_context);
2336
2337     return VA_STATUS_SUCCESS;
2338 }
2339
2340 static void
2341 gen75_mfc_context_destroy(void *context)
2342 {
2343     struct gen6_mfc_context *mfc_context = context;
2344     int i;
2345
2346     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2347     mfc_context->post_deblocking_output.bo = NULL;
2348
2349     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2350     mfc_context->pre_deblocking_output.bo = NULL;
2351
2352     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2353     mfc_context->uncompressed_picture_source.bo = NULL;
2354
2355     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2356     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2357
2358     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2359         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2360         mfc_context->direct_mv_buffers[i].bo = NULL;
2361     }
2362
2363     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2364     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2365
2366     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2367     mfc_context->macroblock_status_buffer.bo = NULL;
2368
2369     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2370     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2371
2372     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2373     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2374
2375
2376     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2377         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2378         mfc_context->reference_surfaces[i].bo = NULL;  
2379     }
2380
2381     i965_gpe_context_destroy(&mfc_context->gpe_context);
2382
2383     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2384     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2385
2386     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2387     mfc_context->aux_batchbuffer_surface.bo = NULL;
2388
2389     if (mfc_context->aux_batchbuffer)
2390         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2391
2392     mfc_context->aux_batchbuffer = NULL;
2393
2394     free(mfc_context);
2395 }
2396
2397 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2398                   VAProfile profile,
2399                   struct encode_state *encode_state,
2400                   struct intel_encoder_context *encoder_context)
2401 {
2402     VAStatus vaStatus;
2403
2404     switch (profile) {
2405     case VAProfileH264Baseline:
2406     case VAProfileH264Main:
2407     case VAProfileH264High:
2408         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2409         break;
2410
2411         /* FIXME: add for other profile */
2412     case VAProfileMPEG2Simple:
2413     case VAProfileMPEG2Main:
2414         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2415         break;
2416
2417     default:
2418         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2419         break;
2420     }
2421
2422     return vaStatus;
2423 }
2424
2425 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2426 {
2427     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2428
2429     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2430
2431     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2432     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2433
2434     mfc_context->gpe_context.curbe.length = 32 * 4;
2435
2436     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2437     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2438     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2439     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2440     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2441
2442     i965_gpe_load_kernels(ctx,
2443                           &mfc_context->gpe_context,
2444                           gen75_mfc_kernels,
2445                           NUM_MFC_KERNEL);
2446
2447     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2448     mfc_context->set_surface_state = gen75_mfc_surface_state;
2449     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2450     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2451     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2452     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2453     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2454     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2455
2456     encoder_context->mfc_context = mfc_context;
2457     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2458     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2459     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2460
2461     return True;
2462 }