Fix incorrect MI_BATCH_BUFFER_START command for MPEG2 encoding on BDW
[platform/upstream/libva-intel-driver.git] / src / gen8_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
47 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
48 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
49
50 #define MFC_SOFTWARE_HASWELL    1
51
52 #define B0_STEP_REV             2
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
54
55 static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
57 };
58
59 static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
61 };
62
63 static struct i965_kernel gen8_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen8_mfc_batchbuffer_avc_intra,
68         sizeof(gen8_mfc_batchbuffer_avc_intra),
69         NULL
70     },
71
72     {
73         "MFC AVC INTER BATCHBUFFER ",
74         MFC_BATCHBUFFER_AVC_INTER,
75         gen8_mfc_batchbuffer_avc_inter,
76         sizeof(gen8_mfc_batchbuffer_avc_inter),
77         NULL
78     },
79 };
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         INTER_16X8              0x01
84 #define         INTER_8X16              0x02
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
93                           int standard_select,
94                           struct intel_encoder_context *encoder_context)
95 {
96     struct intel_batchbuffer *batch = encoder_context->base.batch;
97     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
98
99     assert(standard_select == MFX_FORMAT_MPEG2 ||
100            standard_select == MFX_FORMAT_AVC);
101
102     BEGIN_BCS_BATCH(batch, 5);
103
104     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
105     OUT_BCS_BATCH(batch,
106                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
107                   (MFD_MODE_VLD << 15) | /* VLD mode */
108                   (0 << 10) | /* Stream-Out Enable */
109                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
110                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
111                   (0 << 5)  | /* not in stitch mode */
112                   (1 << 4)  | /* encoding mode */
113                   (standard_select << 0));  /* standard select: avc or mpeg2 */
114     OUT_BCS_BATCH(batch,
115                   (0 << 7)  | /* expand NOA bus flag */
116                   (0 << 6)  | /* disable slice-level clock gating */
117                   (0 << 5)  | /* disable clock gating for NOA */
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0);
124     OUT_BCS_BATCH(batch, 0);
125
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 {
132     struct intel_batchbuffer *batch = encoder_context->base.batch;
133     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134
135     BEGIN_BCS_BATCH(batch, 6);
136
137     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
138     OUT_BCS_BATCH(batch, 0);
139     OUT_BCS_BATCH(batch,
140                   ((mfc_context->surface_state.height - 1) << 18) |
141                   ((mfc_context->surface_state.width - 1) << 4));
142     OUT_BCS_BATCH(batch,
143                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
144                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
145                   (0 << 22) | /* surface object control state, FIXME??? */
146                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
147                   (0 << 2)  | /* must be 0 for interleave U/V */
148                   (1 << 1)  | /* must be tiled */
149                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
150     OUT_BCS_BATCH(batch,
151                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
152                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
153     OUT_BCS_BATCH(batch, 0);
154
155     ADVANCE_BCS_BATCH(batch);
156 }
157
158 static void
159 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
160                                  struct intel_encoder_context *encoder_context)
161 {
162     struct intel_batchbuffer *batch = encoder_context->base.batch;
163     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
164     struct gen6_vme_context *vme_context = encoder_context->vme_context;
165
166     BEGIN_BCS_BATCH(batch, 26);
167
168     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
169     /* the DW1-3 is for the MFX indirect bistream offset */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     /* the DW4-5 is the MFX upper bound */
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176
177     /* the DW6-10 is for MFX Indirect MV Object Base Address */
178     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
182     OUT_BCS_BATCH(batch, 0);
183
184     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190
191     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
192     OUT_BCS_BATCH(batch, 0);
193     OUT_BCS_BATCH(batch, 0);
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_BATCH(batch, 0);
197
198     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
199     OUT_BCS_RELOC(batch,
200                   mfc_context->mfc_indirect_pak_bse_object.bo,
201                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                   0);
203     OUT_BCS_BATCH(batch, 0);
204     OUT_BCS_BATCH(batch, 0);
205         
206     OUT_BCS_RELOC(batch,
207                   mfc_context->mfc_indirect_pak_bse_object.bo,
208                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
209                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
210     OUT_BCS_BATCH(batch, 0);
211
212     ADVANCE_BCS_BATCH(batch);
213 }
214
215 static void
216 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
217                        struct intel_encoder_context *encoder_context)
218 {
219     struct intel_batchbuffer *batch = encoder_context->base.batch;
220     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
221     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
222
223     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
224     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
225
226     BEGIN_BCS_BATCH(batch, 16);
227
228     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
229     /*DW1. MB setting of frame */
230     OUT_BCS_BATCH(batch,
231                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
232     OUT_BCS_BATCH(batch, 
233                   ((height_in_mbs - 1) << 16) | 
234                   ((width_in_mbs - 1) << 0));
235     /* DW3 QP setting */
236     OUT_BCS_BATCH(batch, 
237                   (0 << 24) |   /* Second Chroma QP Offset */
238                   (0 << 16) |   /* Chroma QP Offset */
239                   (0 << 14) |   /* Max-bit conformance Intra flag */
240                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
241                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
242                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
243                   (0 << 8)  |   /* FIXME: Image Structure */
244                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
245     OUT_BCS_BATCH(batch,
246                   (0 << 16) |   /* Mininum Frame size */
247                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
248                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
249                   (0 << 13) |   /* CABAC 0 word insertion test enable */
250                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
251                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
252                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
253                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
254                   (0 << 6)  |   /* Only valid for VLD decoding mode */
255                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
256                   (0 << 4)  |   /* Direct 8x8 inference flag */
257                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
258                   (1 << 2)  |   /* Frame MB only flag */
259                   (0 << 1)  |   /* MBAFF mode is in active */
260                   (0 << 0));    /* Field picture flag */
261     /* DW5 Trellis quantization */
262     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
263     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
264                   (0xBB8 << 16) |       /* InterMbMaxSz */
265                   (0xEE8) );            /* IntraMbMaxSz */
266     OUT_BCS_BATCH(batch, 0);            /* Reserved */
267     /* DW8. QP delta */
268     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
269     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
270     /* DW10. Bit setting for MB */
271     OUT_BCS_BATCH(batch, 0x8C000000);
272     OUT_BCS_BATCH(batch, 0x00010000);
273     /* DW12. */
274     OUT_BCS_BATCH(batch, 0);
275     OUT_BCS_BATCH(batch, 0x02010100);
276     /* DW14. For short format */
277     OUT_BCS_BATCH(batch, 0);
278     OUT_BCS_BATCH(batch, 0);
279
280     ADVANCE_BCS_BATCH(batch);
281 }
282
283 static void
284 gen8_mfc_qm_state(VADriverContextP ctx,
285                   int qm_type,
286                   unsigned int *qm,
287                   int qm_length,
288                   struct intel_encoder_context *encoder_context)
289 {
290     struct intel_batchbuffer *batch = encoder_context->base.batch;
291     unsigned int qm_buffer[16];
292
293     assert(qm_length <= 16);
294     assert(sizeof(*qm) == 4);
295     memcpy(qm_buffer, qm, qm_length * 4);
296
297     BEGIN_BCS_BATCH(batch, 18);
298     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
299     OUT_BCS_BATCH(batch, qm_type << 0);
300     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
301     ADVANCE_BCS_BATCH(batch);
302 }
303
304 static void
305 gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
306 {
307     unsigned int qm[16] = {
308         0x10101010, 0x10101010, 0x10101010, 0x10101010,
309         0x10101010, 0x10101010, 0x10101010, 0x10101010,
310         0x10101010, 0x10101010, 0x10101010, 0x10101010,
311         0x10101010, 0x10101010, 0x10101010, 0x10101010
312     };
313
314     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
315     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
316     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
317     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
318 }
319
320 static void
321 gen8_mfc_fqm_state(VADriverContextP ctx,
322                    int fqm_type,
323                    unsigned int *fqm,
324                    int fqm_length,
325                    struct intel_encoder_context *encoder_context)
326 {
327     struct intel_batchbuffer *batch = encoder_context->base.batch;
328     unsigned int fqm_buffer[32];
329
330     assert(fqm_length <= 32);
331     assert(sizeof(*fqm) == 4);
332     memcpy(fqm_buffer, fqm, fqm_length * 4);
333
334     BEGIN_BCS_BATCH(batch, 34);
335     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
336     OUT_BCS_BATCH(batch, fqm_type << 0);
337     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
343 {
344     unsigned int qm[32] = {
345         0x10001000, 0x10001000, 0x10001000, 0x10001000,
346         0x10001000, 0x10001000, 0x10001000, 0x10001000,
347         0x10001000, 0x10001000, 0x10001000, 0x10001000,
348         0x10001000, 0x10001000, 0x10001000, 0x10001000,
349         0x10001000, 0x10001000, 0x10001000, 0x10001000,
350         0x10001000, 0x10001000, 0x10001000, 0x10001000,
351         0x10001000, 0x10001000, 0x10001000, 0x10001000,
352         0x10001000, 0x10001000, 0x10001000, 0x10001000
353     };
354
355     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
356     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
357     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
358     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
359 }
360
361 static void
362 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
363                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
364                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
365                            struct intel_batchbuffer *batch)
366 {
367     if (batch == NULL)
368         batch = encoder_context->base.batch;
369
370     if (data_bits_in_last_dw == 0)
371         data_bits_in_last_dw = 32;
372
373     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
374
375     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
376     OUT_BCS_BATCH(batch,
377                   (0 << 16) |   /* always start at offset 0 */
378                   (data_bits_in_last_dw << 8) |
379                   (skip_emul_byte_count << 4) |
380                   (!!emulation_flag << 3) |
381                   ((!!is_last_header) << 2) |
382                   ((!!is_end_of_slice) << 1) |
383                   (0 << 0));    /* FIXME: ??? */
384     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
385
386     ADVANCE_BCS_BATCH(batch);
387 }
388
389
390 static void gen8_mfc_init(VADriverContextP ctx,
391                           struct encode_state *encode_state,
392                           struct intel_encoder_context *encoder_context)
393 {
394     struct i965_driver_data *i965 = i965_driver_data(ctx);
395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
396     dri_bo *bo;
397     int i;
398     int width_in_mbs = 0;
399     int height_in_mbs = 0;
400
401     if (encoder_context->codec == CODEC_H264) {
402         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
403         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
404         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
405     } else {
406         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
407
408         assert(encoder_context->codec == CODEC_MPEG2);
409
410         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
411         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
412     }
413
414     /*Encode common setup for MFC*/
415     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
416     mfc_context->post_deblocking_output.bo = NULL;
417
418     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
419     mfc_context->pre_deblocking_output.bo = NULL;
420
421     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
422     mfc_context->uncompressed_picture_source.bo = NULL;
423
424     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
425     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
426
427     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
428         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
429         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
430         mfc_context->direct_mv_buffers[i].bo = NULL;
431     }
432
433     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
434         if (mfc_context->reference_surfaces[i].bo != NULL)
435             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
436         mfc_context->reference_surfaces[i].bo = NULL;  
437     }
438
439     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
440     bo = dri_bo_alloc(i965->intel.bufmgr,
441                       "Buffer",
442                       width_in_mbs * 64,
443                       64);
444     assert(bo);
445     mfc_context->intra_row_store_scratch_buffer.bo = bo;
446
447     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
448     bo = dri_bo_alloc(i965->intel.bufmgr,
449                       "Buffer",
450                       width_in_mbs * height_in_mbs * 16,
451                       64);
452     assert(bo);
453     mfc_context->macroblock_status_buffer.bo = bo;
454
455     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
456     bo = dri_bo_alloc(i965->intel.bufmgr,
457                       "Buffer",
458                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
459                       64);
460     assert(bo);
461     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
462
463     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
464     bo = dri_bo_alloc(i965->intel.bufmgr,
465                       "Buffer",
466                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
467                       0x1000);
468     assert(bo);
469     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
470
471     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
472     mfc_context->mfc_batchbuffer_surface.bo = NULL;
473
474     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
475     mfc_context->aux_batchbuffer_surface.bo = NULL;
476
477     if (mfc_context->aux_batchbuffer)
478         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
479
480     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
481     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
482     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
483     mfc_context->aux_batchbuffer_surface.pitch = 16;
484     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
485     mfc_context->aux_batchbuffer_surface.size_block = 16;
486
487     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
488 }
489
490 static void
491 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
492                              struct intel_encoder_context *encoder_context)
493 {
494     struct intel_batchbuffer *batch = encoder_context->base.batch;
495     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
496     int i;
497
498     BEGIN_BCS_BATCH(batch, 61);
499
500     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
501
502     /* the DW1-3 is for pre_deblocking */
503     if (mfc_context->pre_deblocking_output.bo)
504         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
505                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
506                       0);
507     else
508         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
509
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     /* the DW4-6 is for the post_deblocking */
513
514     if (mfc_context->post_deblocking_output.bo)
515         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
516                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
517                       0);                                                                                       /* post output addr  */ 
518     else
519         OUT_BCS_BATCH(batch, 0);
520     
521     OUT_BCS_BATCH(batch, 0);
522     OUT_BCS_BATCH(batch, 0);
523
524     /* the DW7-9 is for the uncompressed_picture */
525     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
526                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
527                   0); /* uncompressed data */
528
529     OUT_BCS_BATCH(batch, 0);
530     OUT_BCS_BATCH(batch, 0);
531
532     /* the DW10-12 is for the mb status */
533     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
534                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                   0); /* StreamOut data*/
536     
537     OUT_BCS_BATCH(batch, 0);
538     OUT_BCS_BATCH(batch, 0);
539
540     /* the DW13-15 is for the intra_row_store_scratch */
541     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
542                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
543                   0);   
544
545     OUT_BCS_BATCH(batch, 0);
546     OUT_BCS_BATCH(batch, 0);
547
548     /* the DW16-18 is for the deblocking filter */
549     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
550                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551                   0);
552
553     OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the DW 19-50 is for Reference pictures*/
557     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
558         if ( mfc_context->reference_surfaces[i].bo != NULL) {
559             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
560                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
561                           0);                   
562         } else {
563             OUT_BCS_BATCH(batch, 0);
564         }
565
566         OUT_BCS_BATCH(batch, 0);
567     }
568
569     OUT_BCS_BATCH(batch, 0);
570
571     /* The DW 52-54 is for the MB status buffer */
572     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
573                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
574                   0);                                                                                   /* Macroblock status buffer*/
575         
576     OUT_BCS_BATCH(batch, 0);
577     OUT_BCS_BATCH(batch, 0);
578
579     /* the DW 55-57 is the ILDB buffer */
580     OUT_BCS_BATCH(batch, 0);
581     OUT_BCS_BATCH(batch, 0);
582     OUT_BCS_BATCH(batch, 0);
583
584     /* the DW 58-60 is the second ILDB buffer */
585     OUT_BCS_BATCH(batch, 0);
586     OUT_BCS_BATCH(batch, 0);
587     OUT_BCS_BATCH(batch, 0);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
594                               struct intel_encoder_context *encoder_context)
595 {
596     struct intel_batchbuffer *batch = encoder_context->base.batch;
597     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
598
599     int i;
600
601     BEGIN_BCS_BATCH(batch, 71);
602
603     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
604
605     /* Reference frames and Current frames */
606     /* the DW1-32 is for the direct MV for reference */
607     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
608         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
609             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
610                           I915_GEM_DOMAIN_INSTRUCTION, 0,
611                           0);
612             OUT_BCS_BATCH(batch, 0);
613         } else {
614             OUT_BCS_BATCH(batch, 0);
615             OUT_BCS_BATCH(batch, 0);
616         }
617     }
618     
619     OUT_BCS_BATCH(batch, 0);
620
621     /* the DW34-36 is the MV for the current reference */
622     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
623                   I915_GEM_DOMAIN_INSTRUCTION, 0,
624                   0);
625
626     OUT_BCS_BATCH(batch, 0);
627     OUT_BCS_BATCH(batch, 0);
628
629     /* POL list */
630     for(i = 0; i < 32; i++) {
631         OUT_BCS_BATCH(batch, i/2);
632     }
633     OUT_BCS_BATCH(batch, 0);
634     OUT_BCS_BATCH(batch, 0);
635
636     ADVANCE_BCS_BATCH(batch);
637 }
638
639
640 static void
641 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
642                                  struct intel_encoder_context *encoder_context)
643 {
644     struct intel_batchbuffer *batch = encoder_context->base.batch;
645     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
646
647     BEGIN_BCS_BATCH(batch, 10);
648
649     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
650     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
651                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
652                   0);
653     OUT_BCS_BATCH(batch, 0);
654     OUT_BCS_BATCH(batch, 0);
655         
656     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
657     OUT_BCS_BATCH(batch, 0);
658     OUT_BCS_BATCH(batch, 0);
659     OUT_BCS_BATCH(batch, 0);
660
661     /* the DW7-9 is for Bitplane Read Buffer Base Address */
662     OUT_BCS_BATCH(batch, 0);
663     OUT_BCS_BATCH(batch, 0);
664     OUT_BCS_BATCH(batch, 0);
665
666     ADVANCE_BCS_BATCH(batch);
667 }
668
669
670 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
671                                                       struct encode_state *encode_state,
672                                                       struct intel_encoder_context *encoder_context)
673 {
674     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
675
676     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
677     mfc_context->set_surface_state(ctx, encoder_context);
678     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
679     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
680     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
681     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
682     mfc_context->avc_qm_state(ctx, encoder_context);
683     mfc_context->avc_fqm_state(ctx, encoder_context);
684     gen8_mfc_avc_directmode_state(ctx, encoder_context); 
685     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
686 }
687
688
689 static VAStatus gen8_mfc_run(VADriverContextP ctx, 
690                              struct encode_state *encode_state,
691                              struct intel_encoder_context *encoder_context)
692 {
693     struct intel_batchbuffer *batch = encoder_context->base.batch;
694
695     intel_batchbuffer_flush(batch);             //run the pipeline
696
697     return VA_STATUS_SUCCESS;
698 }
699
700
701 static VAStatus
702 gen8_mfc_stop(VADriverContextP ctx, 
703               struct encode_state *encode_state,
704               struct intel_encoder_context *encoder_context,
705               int *encoded_bits_size)
706 {
707     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
708     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
709     VACodedBufferSegment *coded_buffer_segment;
710     
711     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
712     assert(vaStatus == VA_STATUS_SUCCESS);
713     *encoded_bits_size = coded_buffer_segment->size * 8;
714     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
715
716     return VA_STATUS_SUCCESS;
717 }
718
719
720 static void
721 gen8_mfc_avc_slice_state(VADriverContextP ctx,
722                          VAEncPictureParameterBufferH264 *pic_param,
723                          VAEncSliceParameterBufferH264 *slice_param,
724                          struct encode_state *encode_state,
725                          struct intel_encoder_context *encoder_context,
726                          int rate_control_enable,
727                          int qp,
728                          struct intel_batchbuffer *batch)
729 {
730     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
731     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
732     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
733     int beginmb = slice_param->macroblock_address;
734     int endmb = beginmb + slice_param->num_macroblocks;
735     int beginx = beginmb % width_in_mbs;
736     int beginy = beginmb / width_in_mbs;
737     int nextx =  endmb % width_in_mbs;
738     int nexty = endmb / width_in_mbs;
739     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
740     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
741     int maxQpN, maxQpP;
742     unsigned char correct[6], grow, shrink;
743     int i;
744     int weighted_pred_idc = 0;
745     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
746     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
747     int num_ref_l0 = 0, num_ref_l1 = 0;
748
749     if (batch == NULL)
750         batch = encoder_context->base.batch;
751
752     if (slice_type == SLICE_TYPE_I) {
753         luma_log2_weight_denom = 0;
754         chroma_log2_weight_denom = 0;
755     } else if (slice_type == SLICE_TYPE_P) {
756         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
757         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
758
759         if (slice_param->num_ref_idx_active_override_flag)
760             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
761     } else if (slice_type == SLICE_TYPE_B) {
762         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
763         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
764         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
765
766         if (slice_param->num_ref_idx_active_override_flag) {
767             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
768             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
769         }
770
771         if (weighted_pred_idc == 2) {
772             /* 8.4.3 - Derivation process for prediction weights (8-279) */
773             luma_log2_weight_denom = 5;
774             chroma_log2_weight_denom = 5;
775         }
776     }
777
778     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
779     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
780
781     for (i = 0; i < 6; i++)
782         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
783
784     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
785         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
786     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
787         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
788
789     BEGIN_BCS_BATCH(batch, 11);;
790
791     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
792     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
793
794     OUT_BCS_BATCH(batch,
795                   (num_ref_l0 << 16) |
796                   (num_ref_l1 << 24) |
797                   (chroma_log2_weight_denom << 8) |
798                   (luma_log2_weight_denom << 0));
799
800     OUT_BCS_BATCH(batch, 
801                   (weighted_pred_idc << 30) |
802                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
803                   (slice_param->disable_deblocking_filter_idc << 27) |
804                   (slice_param->cabac_init_idc << 24) |
805                   (qp<<16) |                    /*Slice Quantization Parameter*/
806                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
807                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
808     OUT_BCS_BATCH(batch,
809                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
810                   (beginx << 16) |
811                   slice_param->macroblock_address );
812     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
813     OUT_BCS_BATCH(batch, 
814                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
815                   (1 << 30) |           /*ResetRateControlCounter*/
816                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
817                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
818                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
819                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
820                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
821                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
822                   (last_slice << 19) |     /*IsLastSlice*/
823                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
824                   (1 << 17) |       /*HeaderPresentFlag*/       
825                   (1 << 16) |       /*SliceData PresentFlag*/
826                   (1 << 15) |       /*TailPresentFlag*/
827                   (1 << 13) |       /*RBSP NAL TYPE*/   
828                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
829     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
830     OUT_BCS_BATCH(batch,
831                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
832                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
833                   (shrink << 8)  |
834                   (grow << 0));   
835     OUT_BCS_BATCH(batch,
836                   (correct[5] << 20) |
837                   (correct[4] << 16) |
838                   (correct[3] << 12) |
839                   (correct[2] << 8) |
840                   (correct[1] << 4) |
841                   (correct[0] << 0));
842     OUT_BCS_BATCH(batch, 0);
843
844     ADVANCE_BCS_BATCH(batch);
845 }
846
847
848 #ifdef MFC_SOFTWARE_HASWELL
849
850 static int
851 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
852                               int qp,unsigned int *msg,
853                               struct intel_encoder_context *encoder_context,
854                               unsigned char target_mb_size, unsigned char max_mb_size,
855                               struct intel_batchbuffer *batch)
856 {
857     int len_in_dwords = 12;
858     unsigned int intra_msg;
859 #define         INTRA_MSG_FLAG          (1 << 13)
860 #define         INTRA_MBTYPE_MASK       (0x1F0000)
861     if (batch == NULL)
862         batch = encoder_context->base.batch;
863
864     BEGIN_BCS_BATCH(batch, len_in_dwords);
865
866     intra_msg = msg[0] & 0xC0FF;
867     intra_msg |= INTRA_MSG_FLAG;
868     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
869     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
870     OUT_BCS_BATCH(batch, 0);
871     OUT_BCS_BATCH(batch, 0);
872     OUT_BCS_BATCH(batch, 
873                   (0 << 24) |           /* PackedMvNum, Debug*/
874                   (0 << 20) |           /* No motion vector */
875                   (1 << 19) |           /* CbpDcY */
876                   (1 << 18) |           /* CbpDcU */
877                   (1 << 17) |           /* CbpDcV */
878                   intra_msg);
879
880     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
881     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
882     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
883
884     /*Stuff for Intra MB*/
885     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
886     OUT_BCS_BATCH(batch, msg[2]);       
887     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
888     
889     /*MaxSizeInWord and TargetSzieInWord*/
890     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
891                   (target_mb_size << 16) );
892
893     OUT_BCS_BATCH(batch, 0);
894
895     ADVANCE_BCS_BATCH(batch);
896
897     return len_in_dwords;
898 }
899
900 static int
901 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
902                               unsigned int *msg, unsigned int offset,
903                               struct intel_encoder_context *encoder_context,
904                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
905                               struct intel_batchbuffer *batch)
906 {
907     struct gen6_vme_context *vme_context = encoder_context->vme_context;
908     int len_in_dwords = 12;
909     unsigned int inter_msg = 0;
910     if (batch == NULL)
911         batch = encoder_context->base.batch;
912     {
913 #define MSG_MV_OFFSET   4
914         unsigned int *mv_ptr;
915         mv_ptr = msg + MSG_MV_OFFSET;
916         /* MV of VME output is based on 16 sub-blocks. So it is necessary
917          * to convert them to be compatible with the format of AVC_PAK
918          * command.
919          */
920         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
921             /* MV[0] and MV[2] are replicated */
922             mv_ptr[4] = mv_ptr[0];
923             mv_ptr[5] = mv_ptr[1];
924             mv_ptr[2] = mv_ptr[8];
925             mv_ptr[3] = mv_ptr[9];
926             mv_ptr[6] = mv_ptr[8];
927             mv_ptr[7] = mv_ptr[9];
928         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
929             /* MV[0] and MV[1] are replicated */
930             mv_ptr[2] = mv_ptr[0];
931             mv_ptr[3] = mv_ptr[1];
932             mv_ptr[4] = mv_ptr[16];
933             mv_ptr[5] = mv_ptr[17];
934             mv_ptr[6] = mv_ptr[24];
935             mv_ptr[7] = mv_ptr[25];
936         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
937                    !(msg[1] & SUBMB_SHAPE_MASK)) {
938             /* Don't touch MV[0] or MV[1] */
939             mv_ptr[2] = mv_ptr[8];
940             mv_ptr[3] = mv_ptr[9];
941             mv_ptr[4] = mv_ptr[16];
942             mv_ptr[5] = mv_ptr[17];
943             mv_ptr[6] = mv_ptr[24];
944             mv_ptr[7] = mv_ptr[25];
945         }
946     }
947
948     BEGIN_BCS_BATCH(batch, len_in_dwords);
949
950     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
951
952     inter_msg = 32;
953     /* MV quantity */
954     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
955         if (msg[1] & SUBMB_SHAPE_MASK)
956             inter_msg = 128;
957     }
958     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
959     OUT_BCS_BATCH(batch, offset);
960     inter_msg = msg[0] & (0x1F00FFFF);
961     inter_msg |= INTER_MV8;
962     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
963     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
964         (msg[1] & SUBMB_SHAPE_MASK)) {
965         inter_msg |= INTER_MV32;
966     }
967
968     OUT_BCS_BATCH(batch, inter_msg);
969
970     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
971     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
972 #if 0 
973     if ( slice_type == SLICE_TYPE_B) {
974         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
975     } else {
976         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
977     }
978 #else
979     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
980 #endif
981
982     inter_msg = msg[1] >> 8;
983     /*Stuff for Inter MB*/
984     OUT_BCS_BATCH(batch, inter_msg);        
985     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
986     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
987
988     /*MaxSizeInWord and TargetSzieInWord*/
989     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
990                   (target_mb_size << 16) );
991
992     OUT_BCS_BATCH(batch, 0x0);    
993
994     ADVANCE_BCS_BATCH(batch);
995
996     return len_in_dwords;
997 }
998
999 #define         AVC_INTRA_RDO_OFFSET    4
1000 #define         AVC_INTER_RDO_OFFSET    10
1001 #define         AVC_INTER_MSG_OFFSET    8       
1002 #define         AVC_INTER_MV_OFFSET             48
1003 #define         AVC_RDO_MASK            0xFFFF
1004
1005 static void 
1006 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1007                                        struct encode_state *encode_state,
1008                                        struct intel_encoder_context *encoder_context,
1009                                        int slice_index,
1010                                        struct intel_batchbuffer *slice_batch)
1011 {
1012     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1013     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1014     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1015     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1016     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1017     unsigned int *msg = NULL, offset = 0;
1018     unsigned char *msg_ptr = NULL;
1019     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1020     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1021     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1022     int i,x,y;
1023     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1024     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1025     unsigned char *slice_header = NULL;
1026     int slice_header_length_in_bits = 0;
1027     unsigned int tail_data[] = { 0x0, 0x0 };
1028     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1029     int is_intra = slice_type == SLICE_TYPE_I;
1030
1031
1032     if (rate_control_mode == VA_RC_CBR) {
1033         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1034         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1035     }
1036
1037     /* only support for 8-bit pixel bit-depth */
1038     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1039     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1040     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1041     assert(qp >= 0 && qp < 52);
1042
1043     gen8_mfc_avc_slice_state(ctx, 
1044                              pPicParameter,
1045                              pSliceParameter,
1046                              encode_state, encoder_context,
1047                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1048
1049     if ( slice_index == 0) 
1050         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1051
1052     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1053
1054     // slice hander
1055     mfc_context->insert_object(ctx, encoder_context,
1056                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1057                                5,  /* first 5 bytes are start code + nal unit type */
1058                                1, 0, 1, slice_batch);
1059
1060     dri_bo_map(vme_context->vme_output.bo , 1);
1061     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1062
1063     if (is_intra) {
1064         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1065     } else {
1066         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1067     }
1068    
1069     for (i = pSliceParameter->macroblock_address; 
1070          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1071         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1072         x = i % width_in_mbs;
1073         y = i / width_in_mbs;
1074         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1075
1076         if (is_intra) {
1077             assert(msg);
1078             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1079         } else {
1080             int inter_rdo, intra_rdo;
1081             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1082             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1083             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1084             if (intra_rdo < inter_rdo) { 
1085                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1086             } else {
1087                 msg += AVC_INTER_MSG_OFFSET;
1088                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1089             }
1090         }
1091     }
1092    
1093     dri_bo_unmap(vme_context->vme_output.bo);
1094
1095     if ( last_slice ) {    
1096         mfc_context->insert_object(ctx, encoder_context,
1097                                    tail_data, 2, 8,
1098                                    2, 1, 1, 0, slice_batch);
1099     } else {
1100         mfc_context->insert_object(ctx, encoder_context,
1101                                    tail_data, 1, 8,
1102                                    1, 1, 1, 0, slice_batch);
1103     }
1104
1105     free(slice_header);
1106
1107 }
1108
1109 static dri_bo *
1110 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1111                                   struct encode_state *encode_state,
1112                                   struct intel_encoder_context *encoder_context)
1113 {
1114     struct i965_driver_data *i965 = i965_driver_data(ctx);
1115     struct intel_batchbuffer *batch;
1116     dri_bo *batch_bo;
1117     int i;
1118     int buffer_size;
1119     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1120     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1121     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1122
1123     buffer_size = width_in_mbs * height_in_mbs * 64;
1124     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1125     batch_bo = batch->buffer;
1126     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1127         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1128     }
1129
1130     intel_batchbuffer_align(batch, 8);
1131     
1132     BEGIN_BCS_BATCH(batch, 2);
1133     OUT_BCS_BATCH(batch, 0);
1134     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1135     ADVANCE_BCS_BATCH(batch);
1136
1137     dri_bo_reference(batch_bo);
1138     intel_batchbuffer_free(batch);
1139
1140     return batch_bo;
1141 }
1142
1143 #else
1144
1145 static void
1146 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1147                                     struct encode_state *encode_state,
1148                                     struct intel_encoder_context *encoder_context)
1149
1150 {
1151     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1152     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1153
1154     assert(vme_context->vme_output.bo);
1155     mfc_context->buffer_suface_setup(ctx,
1156                                      &mfc_context->gpe_context,
1157                                      &vme_context->vme_output,
1158                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1159                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1160     assert(mfc_context->aux_batchbuffer_surface.bo);
1161     mfc_context->buffer_suface_setup(ctx,
1162                                      &mfc_context->gpe_context,
1163                                      &mfc_context->aux_batchbuffer_surface,
1164                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1165                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1166 }
1167
1168 static void
1169 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1170                                      struct encode_state *encode_state,
1171                                      struct intel_encoder_context *encoder_context)
1172
1173 {
1174     struct i965_driver_data *i965 = i965_driver_data(ctx);
1175     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1176     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1177     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1178     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1179     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1180     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1181     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1182     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1183                                                            "MFC batchbuffer",
1184                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1185                                                            0x1000);
1186     mfc_context->buffer_suface_setup(ctx,
1187                                      &mfc_context->gpe_context,
1188                                      &mfc_context->mfc_batchbuffer_surface,
1189                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1190                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1191 }
1192
1193 static void
1194 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1195                                     struct encode_state *encode_state,
1196                                     struct intel_encoder_context *encoder_context)
1197 {
1198     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1199     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1200 }
1201
1202 static void
1203 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1204                                 struct encode_state *encode_state,
1205                                 struct intel_encoder_context *encoder_context)
1206 {
1207     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1208     struct gen6_interface_descriptor_data *desc;   
1209     int i;
1210     dri_bo *bo;
1211
1212     bo = mfc_context->gpe_context.idrt.bo;
1213     dri_bo_map(bo, 1);
1214     assert(bo->virtual);
1215     desc = bo->virtual;
1216
1217     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1218         struct i965_kernel *kernel;
1219
1220         kernel = &mfc_context->gpe_context.kernels[i];
1221         assert(sizeof(*desc) == 32);
1222
1223         /*Setup the descritor table*/
1224         memset(desc, 0, sizeof(*desc));
1225         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1226         desc->desc2.sampler_count = 0;
1227         desc->desc2.sampler_state_pointer = 0;
1228         desc->desc3.binding_table_entry_count = 2;
1229         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1230         desc->desc4.constant_urb_entry_read_offset = 0;
1231         desc->desc4.constant_urb_entry_read_length = 4;
1232                 
1233         /*kernel start*/
1234         dri_bo_emit_reloc(bo,   
1235                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1236                           0,
1237                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1238                           kernel->bo);
1239         desc++;
1240     }
1241
1242     dri_bo_unmap(bo);
1243 }
1244
1245 static void
1246 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1247                                     struct encode_state *encode_state,
1248                                     struct intel_encoder_context *encoder_context)
1249 {
1250     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1251     
1252     (void)mfc_context;
1253 }
1254
1255 static void
1256 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1257                                          int index,
1258                                          int head_offset,
1259                                          int batchbuffer_offset,
1260                                          int head_size,
1261                                          int tail_size,
1262                                          int number_mb_cmds,
1263                                          int first_object,
1264                                          int last_object,
1265                                          int last_slice,
1266                                          int mb_x,
1267                                          int mb_y,
1268                                          int width_in_mbs,
1269                                          int qp)
1270 {
1271     BEGIN_BATCH(batch, 12);
1272     
1273     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1274     OUT_BATCH(batch, index);
1275     OUT_BATCH(batch, 0);
1276     OUT_BATCH(batch, 0);
1277     OUT_BATCH(batch, 0);
1278     OUT_BATCH(batch, 0);
1279    
1280     /*inline data */
1281     OUT_BATCH(batch, head_offset);
1282     OUT_BATCH(batch, batchbuffer_offset);
1283     OUT_BATCH(batch, 
1284               head_size << 16 |
1285               tail_size);
1286     OUT_BATCH(batch,
1287               number_mb_cmds << 16 |
1288               first_object << 2 |
1289               last_object << 1 |
1290               last_slice);
1291     OUT_BATCH(batch,
1292               mb_y << 8 |
1293               mb_x);
1294     OUT_BATCH(batch,
1295               qp << 16 |
1296               width_in_mbs);
1297
1298     ADVANCE_BATCH(batch);
1299 }
1300
1301 static void
1302 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1303                                        struct intel_encoder_context *encoder_context,
1304                                        VAEncSliceParameterBufferH264 *slice_param,
1305                                        int head_offset,
1306                                        unsigned short head_size,
1307                                        unsigned short tail_size,
1308                                        int batchbuffer_offset,
1309                                        int qp,
1310                                        int last_slice)
1311 {
1312     struct intel_batchbuffer *batch = encoder_context->base.batch;
1313     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1314     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1315     int total_mbs = slice_param->num_macroblocks;
1316     int number_mb_cmds = 128;
1317     int starting_mb = 0;
1318     int last_object = 0;
1319     int first_object = 1;
1320     int i;
1321     int mb_x, mb_y;
1322     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1323
1324     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1325         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1326         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1327         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1328         assert(mb_x <= 255 && mb_y <= 255);
1329
1330         starting_mb += number_mb_cmds;
1331
1332         gen8_mfc_batchbuffer_emit_object_command(batch,
1333                                                  index,
1334                                                  head_offset,
1335                                                  batchbuffer_offset,
1336                                                  head_size,
1337                                                  tail_size,
1338                                                  number_mb_cmds,
1339                                                  first_object,
1340                                                  last_object,
1341                                                  last_slice,
1342                                                  mb_x,
1343                                                  mb_y,
1344                                                  width_in_mbs,
1345                                                  qp);
1346
1347         if (first_object) {
1348             head_offset += head_size;
1349             batchbuffer_offset += head_size;
1350         }
1351
1352         if (last_object) {
1353             head_offset += tail_size;
1354             batchbuffer_offset += tail_size;
1355         }
1356
1357         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1358
1359         first_object = 0;
1360     }
1361
1362     if (!last_object) {
1363         last_object = 1;
1364         number_mb_cmds = total_mbs % number_mb_cmds;
1365         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1366         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1367         assert(mb_x <= 255 && mb_y <= 255);
1368         starting_mb += number_mb_cmds;
1369
1370         gen8_mfc_batchbuffer_emit_object_command(batch,
1371                                                  index,
1372                                                  head_offset,
1373                                                  batchbuffer_offset,
1374                                                  head_size,
1375                                                  tail_size,
1376                                                  number_mb_cmds,
1377                                                  first_object,
1378                                                  last_object,
1379                                                  last_slice,
1380                                                  mb_x,
1381                                                  mb_y,
1382                                                  width_in_mbs,
1383                                                  qp);
1384     }
1385 }
1386                           
1387 /*
1388  * return size in Owords (16bytes)
1389  */         
1390 static int
1391 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1392                                struct encode_state *encode_state,
1393                                struct intel_encoder_context *encoder_context,
1394                                int slice_index,
1395                                int batchbuffer_offset)
1396 {
1397     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1398     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1399     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1400     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1401     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1402     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1403     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1404     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1405     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1406     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1407     unsigned char *slice_header = NULL;
1408     int slice_header_length_in_bits = 0;
1409     unsigned int tail_data[] = { 0x0, 0x0 };
1410     long head_offset;
1411     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1412     unsigned short head_size, tail_size;
1413     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1414
1415     if (rate_control_mode == VA_RC_CBR) {
1416         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1417         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1418     }
1419
1420     /* only support for 8-bit pixel bit-depth */
1421     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1422     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1423     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1424     assert(qp >= 0 && qp < 52);
1425
1426     head_offset = old_used / 16;
1427     gen8_mfc_avc_slice_state(ctx,
1428                              pPicParameter,
1429                              pSliceParameter,
1430                              encode_state,
1431                              encoder_context,
1432                              (rate_control_mode == VA_RC_CBR),
1433                              qp,
1434                              slice_batch);
1435
1436     if (slice_index == 0)
1437         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1438
1439     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1440
1441     // slice hander
1442     mfc_context->insert_object(ctx,
1443                                encoder_context,
1444                                (unsigned int *)slice_header,
1445                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1446                                slice_header_length_in_bits & 0x1f,
1447                                5,  /* first 5 bytes are start code + nal unit type */
1448                                1,
1449                                0,
1450                                1,
1451                                slice_batch);
1452     free(slice_header);
1453
1454     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1455     used = intel_batchbuffer_used_size(slice_batch);
1456     head_size = (used - old_used) / 16;
1457     old_used = used;
1458
1459     /* tail */
1460     if (last_slice) {    
1461         mfc_context->insert_object(ctx,
1462                                    encoder_context,
1463                                    tail_data,
1464                                    2,
1465                                    8,
1466                                    2,
1467                                    1,
1468                                    1,
1469                                    0,
1470                                    slice_batch);
1471     } else {
1472         mfc_context->insert_object(ctx,
1473                                    encoder_context,
1474                                    tail_data,
1475                                    1,
1476                                    8,
1477                                    1,
1478                                    1,
1479                                    1,
1480                                    0,
1481                                    slice_batch);
1482     }
1483
1484     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1485     used = intel_batchbuffer_used_size(slice_batch);
1486     tail_size = (used - old_used) / 16;
1487
1488    
1489     gen8_mfc_avc_batchbuffer_slice_command(ctx,
1490                                            encoder_context,
1491                                            pSliceParameter,
1492                                            head_offset,
1493                                            head_size,
1494                                            tail_size,
1495                                            batchbuffer_offset,
1496                                            qp,
1497                                            last_slice);
1498
1499     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1500 }
1501
1502 static void
1503 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1504                                   struct encode_state *encode_state,
1505                                   struct intel_encoder_context *encoder_context)
1506 {
1507     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1508     struct intel_batchbuffer *batch = encoder_context->base.batch;
1509     int i, size, offset = 0;
1510     intel_batchbuffer_start_atomic(batch, 0x4000); 
1511     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1512
1513     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1514         size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1515         offset += size;
1516     }
1517
1518     intel_batchbuffer_end_atomic(batch);
1519     intel_batchbuffer_flush(batch);
1520 }
1521
1522 static void
1523 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1524                                struct encode_state *encode_state,
1525                                struct intel_encoder_context *encoder_context)
1526 {
1527     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1528     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1529     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1530     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1531 }
1532
1533 static dri_bo *
1534 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1535                                   struct encode_state *encode_state,
1536                                   struct intel_encoder_context *encoder_context)
1537 {
1538     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1539
1540     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1541     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1542
1543     return mfc_context->mfc_batchbuffer_surface.bo;
1544 }
1545
1546 #endif
1547
1548 static void
1549 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1550                                  struct encode_state *encode_state,
1551                                  struct intel_encoder_context *encoder_context)
1552 {
1553     struct intel_batchbuffer *batch = encoder_context->base.batch;
1554     dri_bo *slice_batch_bo;
1555
1556     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1557         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1558         assert(0);
1559         return; 
1560     }
1561
1562 #ifdef MFC_SOFTWARE_HASWELL
1563     slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1564 #else
1565     slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1566 #endif
1567
1568     // begin programing
1569     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1570     intel_batchbuffer_emit_mi_flush(batch);
1571     
1572     // picture level programing
1573     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1574
1575     BEGIN_BCS_BATCH(batch, 3);
1576     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1577     OUT_BCS_RELOC(batch,
1578                   slice_batch_bo,
1579                   I915_GEM_DOMAIN_COMMAND, 0, 
1580                   0);
1581     OUT_BCS_BATCH(batch, 0);
1582     ADVANCE_BCS_BATCH(batch);
1583
1584     // end programing
1585     intel_batchbuffer_end_atomic(batch);
1586
1587     dri_bo_unreference(slice_batch_bo);
1588 }
1589
1590
1591 static VAStatus
1592 gen8_mfc_avc_encode_picture(VADriverContextP ctx, 
1593                             struct encode_state *encode_state,
1594                             struct intel_encoder_context *encoder_context)
1595 {
1596     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1597     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1598     int current_frame_bits_size;
1599     int sts;
1600  
1601     for (;;) {
1602         gen8_mfc_init(ctx, encode_state, encoder_context);
1603         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1604         /*Programing bcs pipeline*/
1605         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1606         gen8_mfc_run(ctx, encode_state, encoder_context);
1607         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1608             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1609             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1610             if (sts == BRC_NO_HRD_VIOLATION) {
1611                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1612                 break;
1613             }
1614             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1615                 if (!mfc_context->hrd.violation_noted) {
1616                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1617                     mfc_context->hrd.violation_noted = 1;
1618                 }
1619                 return VA_STATUS_SUCCESS;
1620             }
1621         } else {
1622             break;
1623         }
1624     }
1625
1626     return VA_STATUS_SUCCESS;
1627 }
1628
1629 /*
1630  * MPEG-2
1631  */
1632
1633 static const int
1634 va_to_gen8_mpeg2_picture_type[3] = {
1635     1,  /* I */
1636     2,  /* P */
1637     3   /* B */
1638 };
1639
1640 static void
1641 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1642                          struct intel_encoder_context *encoder_context,
1643                          struct encode_state *encode_state)
1644 {
1645     struct intel_batchbuffer *batch = encoder_context->base.batch;
1646     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1647     VAEncPictureParameterBufferMPEG2 *pic_param;
1648     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1649     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1650     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1651
1652     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1653     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1654     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1655
1656     BEGIN_BCS_BATCH(batch, 13);
1657     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1658     OUT_BCS_BATCH(batch,
1659                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1660                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1661                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1662                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1663                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1664                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1665                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1666                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1667                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1668                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1669                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1670                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1671     OUT_BCS_BATCH(batch,
1672                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1673                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1674                   0);
1675     OUT_BCS_BATCH(batch,
1676                   1 << 31 |     /* slice concealment */
1677                   (height_in_mbs - 1) << 16 |
1678                   (width_in_mbs - 1));
1679
1680     if (slice_param && slice_param->quantiser_scale_code >= 14)
1681         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1682     else
1683         OUT_BCS_BATCH(batch, 0);
1684
1685     OUT_BCS_BATCH(batch, 0);
1686     OUT_BCS_BATCH(batch,
1687                   0xFFF << 16 | /* InterMBMaxSize */
1688                   0xFFF << 0 |  /* IntraMBMaxSize */
1689                   0);
1690     OUT_BCS_BATCH(batch, 0);
1691     OUT_BCS_BATCH(batch, 0);
1692     OUT_BCS_BATCH(batch, 0);
1693     OUT_BCS_BATCH(batch, 0);
1694     OUT_BCS_BATCH(batch, 0);
1695     OUT_BCS_BATCH(batch, 0);
1696     ADVANCE_BCS_BATCH(batch);
1697 }
1698
1699 static void
1700 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1701 {
1702     unsigned char intra_qm[64] = {
1703         8, 16, 19, 22, 26, 27, 29, 34,
1704         16, 16, 22, 24, 27, 29, 34, 37,
1705         19, 22, 26, 27, 29, 34, 34, 38,
1706         22, 22, 26, 27, 29, 34, 37, 40,
1707         22, 26, 27, 29, 32, 35, 40, 48,
1708         26, 27, 29, 32, 35, 40, 48, 58,
1709         26, 27, 29, 34, 38, 46, 56, 69,
1710         27, 29, 35, 38, 46, 56, 69, 83
1711     };
1712
1713     unsigned char non_intra_qm[64] = {
1714         16, 16, 16, 16, 16, 16, 16, 16,
1715         16, 16, 16, 16, 16, 16, 16, 16,
1716         16, 16, 16, 16, 16, 16, 16, 16,
1717         16, 16, 16, 16, 16, 16, 16, 16,
1718         16, 16, 16, 16, 16, 16, 16, 16,
1719         16, 16, 16, 16, 16, 16, 16, 16,
1720         16, 16, 16, 16, 16, 16, 16, 16,
1721         16, 16, 16, 16, 16, 16, 16, 16
1722     };
1723
1724     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1725     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1726 }
1727
1728 static void
1729 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1730 {
1731     unsigned short intra_fqm[64] = {
1732         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1733         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1734         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1735         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1736         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1737         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1738         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1739         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1740     };
1741
1742     unsigned short non_intra_fqm[64] = {
1743         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1744         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1745         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1746         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1747         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1748         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1749         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1750         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1751     };
1752
1753     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1754     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1755 }
1756
1757 static void
1758 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1759                                 struct intel_encoder_context *encoder_context,
1760                                 int x, int y,
1761                                 int next_x, int next_y,
1762                                 int is_fisrt_slice_group,
1763                                 int is_last_slice_group,
1764                                 int intra_slice,
1765                                 int qp,
1766                                 struct intel_batchbuffer *batch)
1767 {
1768     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1769
1770     if (batch == NULL)
1771         batch = encoder_context->base.batch;
1772
1773     BEGIN_BCS_BATCH(batch, 8);
1774
1775     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1776     OUT_BCS_BATCH(batch,
1777                   0 << 31 |                             /* MbRateCtrlFlag */
1778                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1779                   1 << 17 |                             /* Insert Header before the first slice group data */
1780                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1781                   1 << 15 |                             /* TailPresentFlag: always 1 */
1782                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1783                   !!intra_slice << 13 |                 /* IntraSlice */
1784                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1785                   0);
1786     OUT_BCS_BATCH(batch,
1787                   next_y << 24 |
1788                   next_x << 16 |
1789                   y << 8 |
1790                   x << 0 |
1791                   0);
1792     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1793     /* bitstream pointer is only loaded once for the first slice of a frame when 
1794      * LoadSlicePointerFlag is 0
1795      */
1796     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1797     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1798     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1799     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1800
1801     ADVANCE_BCS_BATCH(batch);
1802 }
1803
1804 static int
1805 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1806                                 struct intel_encoder_context *encoder_context,
1807                                 int x, int y,
1808                                 int first_mb_in_slice,
1809                                 int last_mb_in_slice,
1810                                 int first_mb_in_slice_group,
1811                                 int last_mb_in_slice_group,
1812                                 int mb_type,
1813                                 int qp_scale_code,
1814                                 int coded_block_pattern,
1815                                 unsigned char target_size_in_word,
1816                                 unsigned char max_size_in_word,
1817                                 struct intel_batchbuffer *batch)
1818 {
1819     int len_in_dwords = 9;
1820
1821     if (batch == NULL)
1822         batch = encoder_context->base.batch;
1823
1824     BEGIN_BCS_BATCH(batch, len_in_dwords);
1825
1826     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1827     OUT_BCS_BATCH(batch,
1828                   0 << 24 |     /* PackedMvNum */
1829                   0 << 20 |     /* MvFormat */
1830                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1831                   0 << 15 |     /* TransformFlag: frame DCT */
1832                   0 << 14 |     /* FieldMbFlag */
1833                   1 << 13 |     /* IntraMbFlag */
1834                   mb_type << 8 |   /* MbType: Intra */
1835                   0 << 2 |      /* SkipMbFlag */
1836                   0 << 0 |      /* InterMbMode */
1837                   0);
1838     OUT_BCS_BATCH(batch, y << 16 | x);
1839     OUT_BCS_BATCH(batch,
1840                   max_size_in_word << 24 |
1841                   target_size_in_word << 16 |
1842                   coded_block_pattern << 6 |      /* CBP */
1843                   0);
1844     OUT_BCS_BATCH(batch,
1845                   last_mb_in_slice << 31 |
1846                   first_mb_in_slice << 30 |
1847                   0 << 27 |     /* EnableCoeffClamp */
1848                   last_mb_in_slice_group << 26 |
1849                   0 << 25 |     /* MbSkipConvDisable */
1850                   first_mb_in_slice_group << 24 |
1851                   0 << 16 |     /* MvFieldSelect */
1852                   qp_scale_code << 0 |
1853                   0);
1854     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1855     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1856     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1857     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1858
1859     ADVANCE_BCS_BATCH(batch);
1860
1861     return len_in_dwords;
1862 }
1863
1864 /* Byte offset */
1865 #define MPEG2_INTER_MV_OFFSET   48 
1866
1867 static struct _mv_ranges
1868 {
1869     int low;    /* in the unit of 1/2 pixel */
1870     int high;   /* in the unit of 1/2 pixel */
1871 } mv_ranges[] = {
1872     {0, 0},
1873     {-16, 15},
1874     {-32, 31},
1875     {-64, 63},
1876     {-128, 127},
1877     {-256, 255},
1878     {-512, 511},
1879     {-1024, 1023},
1880     {-2048, 2047},
1881     {-4096, 4095}
1882 };
1883
1884 static int
1885 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1886 {
1887     if (mv + pos * 16 * 2 < 0 ||
1888         mv + (pos + 1) * 16 * 2 > display_max * 2)
1889         mv = 0;
1890
1891     if (f_code > 0 && f_code < 10) {
1892         if (mv < mv_ranges[f_code].low)
1893             mv = mv_ranges[f_code].low;
1894
1895         if (mv > mv_ranges[f_code].high)
1896             mv = mv_ranges[f_code].high;
1897     }
1898
1899     return mv;
1900 }
1901
1902 static int
1903 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1904                                 struct encode_state *encode_state,
1905                                 struct intel_encoder_context *encoder_context,
1906                                 unsigned int *msg,
1907                                 int width_in_mbs, int height_in_mbs,
1908                                 int x, int y,
1909                                 int first_mb_in_slice,
1910                                 int last_mb_in_slice,
1911                                 int first_mb_in_slice_group,
1912                                 int last_mb_in_slice_group,
1913                                 int qp_scale_code,
1914                                 unsigned char target_size_in_word,
1915                                 unsigned char max_size_in_word,
1916                                 struct intel_batchbuffer *batch)
1917 {
1918     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1919     int len_in_dwords = 9;
1920     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1921     
1922     if (batch == NULL)
1923         batch = encoder_context->base.batch;
1924
1925     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1926     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1927     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1928     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1929     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1930
1931     BEGIN_BCS_BATCH(batch, len_in_dwords);
1932
1933     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1934     OUT_BCS_BATCH(batch,
1935                   2 << 24 |     /* PackedMvNum */
1936                   7 << 20 |     /* MvFormat */
1937                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1938                   0 << 15 |     /* TransformFlag: frame DCT */
1939                   0 << 14 |     /* FieldMbFlag */
1940                   0 << 13 |     /* IntraMbFlag */
1941                   1 << 8 |      /* MbType: Frame-based */
1942                   0 << 2 |      /* SkipMbFlag */
1943                   0 << 0 |      /* InterMbMode */
1944                   0);
1945     OUT_BCS_BATCH(batch, y << 16 | x);
1946     OUT_BCS_BATCH(batch,
1947                   max_size_in_word << 24 |
1948                   target_size_in_word << 16 |
1949                   0x3f << 6 |   /* CBP */
1950                   0);
1951     OUT_BCS_BATCH(batch,
1952                   last_mb_in_slice << 31 |
1953                   first_mb_in_slice << 30 |
1954                   0 << 27 |     /* EnableCoeffClamp */
1955                   last_mb_in_slice_group << 26 |
1956                   0 << 25 |     /* MbSkipConvDisable */
1957                   first_mb_in_slice_group << 24 |
1958                   0 << 16 |     /* MvFieldSelect */
1959                   qp_scale_code << 0 |
1960                   0);
1961
1962     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1963     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1964     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1965     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1966
1967     ADVANCE_BCS_BATCH(batch);
1968
1969     return len_in_dwords;
1970 }
1971
1972 static void
1973 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1974                                            struct encode_state *encode_state,
1975                                            struct intel_encoder_context *encoder_context,
1976                                            struct intel_batchbuffer *slice_batch)
1977 {
1978     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1979     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1980
1981     if (encode_state->packed_header_data[idx]) {
1982         VAEncPackedHeaderParameterBuffer *param = NULL;
1983         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1984         unsigned int length_in_bits;
1985
1986         assert(encode_state->packed_header_param[idx]);
1987         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1988         length_in_bits = param->bit_length;
1989
1990         mfc_context->insert_object(ctx,
1991                                    encoder_context,
1992                                    header_data,
1993                                    ALIGN(length_in_bits, 32) >> 5,
1994                                    length_in_bits & 0x1f,
1995                                    5,   /* FIXME: check it */
1996                                    0,
1997                                    0,
1998                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1999                                    slice_batch);
2000     }
2001
2002     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2003
2004     if (encode_state->packed_header_data[idx]) {
2005         VAEncPackedHeaderParameterBuffer *param = NULL;
2006         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2007         unsigned int length_in_bits;
2008
2009         assert(encode_state->packed_header_param[idx]);
2010         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2011         length_in_bits = param->bit_length;
2012
2013         mfc_context->insert_object(ctx,
2014                                    encoder_context,
2015                                    header_data,
2016                                    ALIGN(length_in_bits, 32) >> 5,
2017                                    length_in_bits & 0x1f,
2018                                    5,   /* FIXME: check it */
2019                                    0,
2020                                    0,
2021                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2022                                    slice_batch);
2023     }
2024 }
2025
2026 static void 
2027 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2028                                     struct encode_state *encode_state,
2029                                     struct intel_encoder_context *encoder_context,
2030                                     int slice_index,
2031                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2032                                     struct intel_batchbuffer *slice_batch)
2033 {
2034     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2035     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2036     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2037     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2038     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2039     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2040     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2041     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2042     int i, j;
2043     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2044     unsigned int *msg = NULL;
2045     unsigned char *msg_ptr = NULL;
2046
2047     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2048     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2049     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2050     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2051
2052     dri_bo_map(vme_context->vme_output.bo , 0);
2053     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2054
2055     if (next_slice_group_param) {
2056         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2057         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2058     } else {
2059         h_next_start_pos = 0;
2060         v_next_start_pos = height_in_mbs;
2061     }
2062
2063     gen8_mfc_mpeg2_slicegroup_state(ctx,
2064                                     encoder_context,
2065                                     h_start_pos,
2066                                     v_start_pos,
2067                                     h_next_start_pos,
2068                                     v_next_start_pos,
2069                                     slice_index == 0,
2070                                     next_slice_group_param == NULL,
2071                                     slice_param->is_intra_slice,
2072                                     slice_param->quantiser_scale_code,
2073                                     slice_batch);
2074
2075     if (slice_index == 0) 
2076         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2077
2078     /* Insert '00' to make sure the header is valid */
2079     mfc_context->insert_object(ctx,
2080                                encoder_context,
2081                                (unsigned int*)section_delimiter,
2082                                1,
2083                                8,   /* 8bits in the last DWORD */
2084                                1,   /* 1 byte */
2085                                1,
2086                                0,
2087                                0,
2088                                slice_batch);
2089
2090     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2091         /* PAK for each macroblocks */
2092         for (j = 0; j < slice_param->num_macroblocks; j++) {
2093             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2094             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2095             int first_mb_in_slice = (j == 0);
2096             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2097             int first_mb_in_slice_group = (i == 0 && j == 0);
2098             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2099                                           j == slice_param->num_macroblocks - 1);
2100
2101             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2102
2103             if (slice_param->is_intra_slice) {
2104                 gen8_mfc_mpeg2_pak_object_intra(ctx,
2105                                                 encoder_context,
2106                                                 h_pos, v_pos,
2107                                                 first_mb_in_slice,
2108                                                 last_mb_in_slice,
2109                                                 first_mb_in_slice_group,
2110                                                 last_mb_in_slice_group,
2111                                                 0x1a,
2112                                                 slice_param->quantiser_scale_code,
2113                                                 0x3f,
2114                                                 0,
2115                                                 0xff,
2116                                                 slice_batch);
2117             } else {
2118                 gen8_mfc_mpeg2_pak_object_inter(ctx,
2119                                                 encode_state,
2120                                                 encoder_context,
2121                                                 msg,
2122                                                 width_in_mbs, height_in_mbs,
2123                                                 h_pos, v_pos,
2124                                                 first_mb_in_slice,
2125                                                 last_mb_in_slice,
2126                                                 first_mb_in_slice_group,
2127                                                 last_mb_in_slice_group,
2128                                                 slice_param->quantiser_scale_code,
2129                                                 0,
2130                                                 0xff,
2131                                                 slice_batch);
2132             }
2133         }
2134
2135         slice_param++;
2136     }
2137
2138     dri_bo_unmap(vme_context->vme_output.bo);
2139
2140     /* tail data */
2141     if (next_slice_group_param == NULL) { /* end of a picture */
2142         mfc_context->insert_object(ctx,
2143                                    encoder_context,
2144                                    (unsigned int *)tail_delimiter,
2145                                    2,
2146                                    8,   /* 8bits in the last DWORD */
2147                                    5,   /* 5 bytes */
2148                                    1,
2149                                    1,
2150                                    0,
2151                                    slice_batch);
2152     } else {        /* end of a lsice group */
2153         mfc_context->insert_object(ctx,
2154                                    encoder_context,
2155                                    (unsigned int *)section_delimiter,
2156                                    1,
2157                                    8,   /* 8bits in the last DWORD */
2158                                    1,   /* 1 byte */
2159                                    1,
2160                                    1,
2161                                    0,
2162                                    slice_batch);
2163     }
2164 }
2165
2166 /* 
2167  * A batch buffer for all slices, including slice state, 
2168  * slice insert object and slice pak object commands
2169  *
2170  */
2171 static dri_bo *
2172 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2173                                           struct encode_state *encode_state,
2174                                           struct intel_encoder_context *encoder_context)
2175 {
2176     struct i965_driver_data *i965 = i965_driver_data(ctx);
2177     struct intel_batchbuffer *batch;
2178     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2179     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2180     dri_bo *batch_bo;
2181     int i;
2182     int buffer_size;
2183     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2184     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2185
2186     buffer_size = width_in_mbs * height_in_mbs * 64;
2187     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2188     batch_bo = batch->buffer;
2189
2190     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2191         if (i == encode_state->num_slice_params_ext - 1)
2192             next_slice_group_param = NULL;
2193         else
2194             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2195
2196         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2197     }
2198
2199     intel_batchbuffer_align(batch, 8);
2200     
2201     BEGIN_BCS_BATCH(batch, 2);
2202     OUT_BCS_BATCH(batch, 0);
2203     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2204     ADVANCE_BCS_BATCH(batch);
2205
2206     dri_bo_reference(batch_bo);
2207     intel_batchbuffer_free(batch);
2208
2209     return batch_bo;
2210 }
2211
2212 static void
2213 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2214                                            struct encode_state *encode_state,
2215                                            struct intel_encoder_context *encoder_context)
2216 {
2217     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2218
2219     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2220     mfc_context->set_surface_state(ctx, encoder_context);
2221     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2222     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2223     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2224     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2225     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2226     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2227 }
2228
2229 static void
2230 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2231                                    struct encode_state *encode_state,
2232                                    struct intel_encoder_context *encoder_context)
2233 {
2234     struct intel_batchbuffer *batch = encoder_context->base.batch;
2235     dri_bo *slice_batch_bo;
2236
2237     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2238
2239     // begin programing
2240     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2241     intel_batchbuffer_emit_mi_flush(batch);
2242     
2243     // picture level programing
2244     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2245
2246     BEGIN_BCS_BATCH(batch, 4);
2247     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2248     OUT_BCS_RELOC(batch,
2249                   slice_batch_bo,
2250                   I915_GEM_DOMAIN_COMMAND, 0, 
2251                   0);
2252     OUT_BCS_BATCH(batch, 0);
2253     OUT_BCS_BATCH(batch, 0);
2254     ADVANCE_BCS_BATCH(batch);
2255
2256     // end programing
2257     intel_batchbuffer_end_atomic(batch);
2258
2259     dri_bo_unreference(slice_batch_bo);
2260 }
2261
2262 static VAStatus
2263 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2264                         struct encode_state *encode_state,
2265                         struct intel_encoder_context *encoder_context)
2266 {
2267     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2268     struct object_surface *obj_surface; 
2269     struct object_buffer *obj_buffer;
2270     struct i965_coded_buffer_segment *coded_buffer_segment;
2271     VAStatus vaStatus = VA_STATUS_SUCCESS;
2272     dri_bo *bo;
2273     int i;
2274
2275     /* reconstructed surface */
2276     obj_surface = encode_state->reconstructed_object;
2277     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2278     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2279     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2280     mfc_context->surface_state.width = obj_surface->orig_width;
2281     mfc_context->surface_state.height = obj_surface->orig_height;
2282     mfc_context->surface_state.w_pitch = obj_surface->width;
2283     mfc_context->surface_state.h_pitch = obj_surface->height;
2284
2285     /* forward reference */
2286     obj_surface = encode_state->reference_objects[0];
2287
2288     if (obj_surface && obj_surface->bo) {
2289         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2290         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2291     } else
2292         mfc_context->reference_surfaces[0].bo = NULL;
2293
2294     /* backward reference */
2295     obj_surface = encode_state->reference_objects[1];
2296
2297     if (obj_surface && obj_surface->bo) {
2298         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2299         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2300     } else {
2301         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2302
2303         if (mfc_context->reference_surfaces[1].bo)
2304             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2305     }
2306
2307     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2308         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2309
2310         if (mfc_context->reference_surfaces[i].bo)
2311             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2312     }
2313     
2314     /* input YUV surface */
2315     obj_surface = encode_state->input_yuv_object;
2316     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2317     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2318
2319     /* coded buffer */
2320     obj_buffer = encode_state->coded_buf_object;
2321     bo = obj_buffer->buffer_store->bo;
2322     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2323     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2324     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2325     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2326
2327     /* set the internal flag to 0 to indicate the coded size is unknown */
2328     dri_bo_map(bo, 1);
2329     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2330     coded_buffer_segment->mapped = 0;
2331     coded_buffer_segment->codec = encoder_context->codec;
2332     dri_bo_unmap(bo);
2333
2334     return vaStatus;
2335 }
2336
2337 static VAStatus
2338 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2339                               struct encode_state *encode_state,
2340                               struct intel_encoder_context *encoder_context)
2341 {
2342     gen8_mfc_init(ctx, encode_state, encoder_context);
2343     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2344     /*Programing bcs pipeline*/
2345     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2346     gen8_mfc_run(ctx, encode_state, encoder_context);
2347
2348     return VA_STATUS_SUCCESS;
2349 }
2350
2351 static void
2352 gen8_mfc_context_destroy(void *context)
2353 {
2354     struct gen6_mfc_context *mfc_context = context;
2355     int i;
2356
2357     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2358     mfc_context->post_deblocking_output.bo = NULL;
2359
2360     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2361     mfc_context->pre_deblocking_output.bo = NULL;
2362
2363     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2364     mfc_context->uncompressed_picture_source.bo = NULL;
2365
2366     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2367     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2368
2369     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2370         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2371         mfc_context->direct_mv_buffers[i].bo = NULL;
2372     }
2373
2374     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2375     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2376
2377     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2378     mfc_context->macroblock_status_buffer.bo = NULL;
2379
2380     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2381     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2382
2383     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2384     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2385
2386
2387     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2388         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2389         mfc_context->reference_surfaces[i].bo = NULL;  
2390     }
2391
2392     i965_gpe_context_destroy(&mfc_context->gpe_context);
2393
2394     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2395     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2396
2397     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2398     mfc_context->aux_batchbuffer_surface.bo = NULL;
2399
2400     if (mfc_context->aux_batchbuffer)
2401         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2402
2403     mfc_context->aux_batchbuffer = NULL;
2404
2405     free(mfc_context);
2406 }
2407
2408 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
2409                                   VAProfile profile,
2410                                   struct encode_state *encode_state,
2411                                   struct intel_encoder_context *encoder_context)
2412 {
2413     VAStatus vaStatus;
2414
2415     switch (profile) {
2416     case VAProfileH264Baseline:
2417     case VAProfileH264Main:
2418     case VAProfileH264High:
2419         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2420         break;
2421
2422         /* FIXME: add for other profile */
2423     case VAProfileMPEG2Simple:
2424     case VAProfileMPEG2Main:
2425         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2426         break;
2427
2428     default:
2429         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2430         break;
2431     }
2432
2433     return vaStatus;
2434 }
2435
2436 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2437 {
2438     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2439
2440     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2441
2442     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2443     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2444
2445     mfc_context->gpe_context.curbe.length = 32 * 4;
2446
2447     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2448     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2449     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2450     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2451     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2452
2453     i965_gpe_load_kernels(ctx,
2454                           &mfc_context->gpe_context,
2455                           gen8_mfc_kernels,
2456                           NUM_MFC_KERNEL);
2457
2458     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
2459     mfc_context->set_surface_state = gen8_mfc_surface_state;
2460     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
2461     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
2462     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
2463     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
2464     mfc_context->insert_object = gen8_mfc_avc_insert_object;
2465     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2466
2467     encoder_context->mfc_context = mfc_context;
2468     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
2469     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
2470     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2471
2472     return True;
2473 }