VPP: Fix Coverity alert on unitialized vpp_kernels
[platform/upstream/libva-intel-driver.git] / src / gen8_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
47 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
48 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
49
50 #define MFC_SOFTWARE_HASWELL    1
51
52 #define B0_STEP_REV             2
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
54
55 static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
57 };
58
59 static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
61 };
62
63 static struct i965_kernel gen8_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen8_mfc_batchbuffer_avc_intra,
68         sizeof(gen8_mfc_batchbuffer_avc_intra),
69         NULL
70     },
71
72     {
73         "MFC AVC INTER BATCHBUFFER ",
74         MFC_BATCHBUFFER_AVC_INTER,
75         gen8_mfc_batchbuffer_avc_inter,
76         sizeof(gen8_mfc_batchbuffer_avc_inter),
77         NULL
78     },
79 };
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         INTER_16X8              0x01
84 #define         INTER_8X16              0x02
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
93                           int standard_select,
94                           struct intel_encoder_context *encoder_context)
95 {
96     struct intel_batchbuffer *batch = encoder_context->base.batch;
97     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
98
99     assert(standard_select == MFX_FORMAT_MPEG2 ||
100            standard_select == MFX_FORMAT_AVC);
101
102     BEGIN_BCS_BATCH(batch, 5);
103
104     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
105     OUT_BCS_BATCH(batch,
106                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
107                   (MFD_MODE_VLD << 15) | /* VLD mode */
108                   (0 << 10) | /* Stream-Out Enable */
109                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
110                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
111                   (0 << 5)  | /* not in stitch mode */
112                   (1 << 4)  | /* encoding mode */
113                   (standard_select << 0));  /* standard select: avc or mpeg2 */
114     OUT_BCS_BATCH(batch,
115                   (0 << 7)  | /* expand NOA bus flag */
116                   (0 << 6)  | /* disable slice-level clock gating */
117                   (0 << 5)  | /* disable clock gating for NOA */
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0);
124     OUT_BCS_BATCH(batch, 0);
125
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 {
132     struct intel_batchbuffer *batch = encoder_context->base.batch;
133     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134
135     BEGIN_BCS_BATCH(batch, 6);
136
137     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
138     OUT_BCS_BATCH(batch, 0);
139     OUT_BCS_BATCH(batch,
140                   ((mfc_context->surface_state.height - 1) << 18) |
141                   ((mfc_context->surface_state.width - 1) << 4));
142     OUT_BCS_BATCH(batch,
143                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
144                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
145                   (0 << 22) | /* surface object control state, FIXME??? */
146                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
147                   (0 << 2)  | /* must be 0 for interleave U/V */
148                   (1 << 1)  | /* must be tiled */
149                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
150     OUT_BCS_BATCH(batch,
151                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
152                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
153     OUT_BCS_BATCH(batch, 0);
154
155     ADVANCE_BCS_BATCH(batch);
156 }
157
158 static void
159 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
160                                  struct intel_encoder_context *encoder_context)
161 {
162     struct intel_batchbuffer *batch = encoder_context->base.batch;
163     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
164     struct gen6_vme_context *vme_context = encoder_context->vme_context;
165     int vme_size;
166
167     BEGIN_BCS_BATCH(batch, 26);
168
169     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
170     /* the DW1-3 is for the MFX indirect bistream offset */
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     /* the DW4-5 is the MFX upper bound */
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177
178     vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
179     /* the DW6-10 is for MFX Indirect MV Object Base Address */
180     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
184     OUT_BCS_BATCH(batch, 0);
185
186     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198     OUT_BCS_BATCH(batch, 0);
199
200     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   0);
205     OUT_BCS_BATCH(batch, 0);
206     OUT_BCS_BATCH(batch, 0);
207         
208     OUT_BCS_RELOC(batch,
209                   mfc_context->mfc_indirect_pak_bse_object.bo,
210                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
211                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
212     OUT_BCS_BATCH(batch, 0);
213
214     ADVANCE_BCS_BATCH(batch);
215 }
216
217 static void
218 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
219                        struct intel_encoder_context *encoder_context)
220 {
221     struct intel_batchbuffer *batch = encoder_context->base.batch;
222     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
223     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
224
225     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
226     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
227
228     BEGIN_BCS_BATCH(batch, 16);
229
230     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
231     /*DW1. MB setting of frame */
232     OUT_BCS_BATCH(batch,
233                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
234     OUT_BCS_BATCH(batch, 
235                   ((height_in_mbs - 1) << 16) | 
236                   ((width_in_mbs - 1) << 0));
237     /* DW3 QP setting */
238     OUT_BCS_BATCH(batch, 
239                   (0 << 24) |   /* Second Chroma QP Offset */
240                   (0 << 16) |   /* Chroma QP Offset */
241                   (0 << 14) |   /* Max-bit conformance Intra flag */
242                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
243                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
244                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
245                   (0 << 8)  |   /* FIXME: Image Structure */
246                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
247     OUT_BCS_BATCH(batch,
248                   (0 << 16) |   /* Mininum Frame size */
249                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
250                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
251                   (0 << 13) |   /* CABAC 0 word insertion test enable */
252                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
253                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
254                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
255                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
256                   (0 << 6)  |   /* Only valid for VLD decoding mode */
257                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
258                   (0 << 4)  |   /* Direct 8x8 inference flag */
259                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
260                   (1 << 2)  |   /* Frame MB only flag */
261                   (0 << 1)  |   /* MBAFF mode is in active */
262                   (0 << 0));    /* Field picture flag */
263     /* DW5 Trellis quantization */
264     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
265     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
266                   (0xBB8 << 16) |       /* InterMbMaxSz */
267                   (0xEE8) );            /* IntraMbMaxSz */
268     OUT_BCS_BATCH(batch, 0);            /* Reserved */
269     /* DW8. QP delta */
270     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
271     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
272     /* DW10. Bit setting for MB */
273     OUT_BCS_BATCH(batch, 0x8C000000);
274     OUT_BCS_BATCH(batch, 0x00010000);
275     /* DW12. */
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0x02010100);
278     /* DW14. For short format */
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0);
281
282     ADVANCE_BCS_BATCH(batch);
283 }
284
285 static void
286 gen8_mfc_qm_state(VADriverContextP ctx,
287                   int qm_type,
288                   unsigned int *qm,
289                   int qm_length,
290                   struct intel_encoder_context *encoder_context)
291 {
292     struct intel_batchbuffer *batch = encoder_context->base.batch;
293     unsigned int qm_buffer[16];
294
295     assert(qm_length <= 16);
296     assert(sizeof(*qm) == 4);
297     memcpy(qm_buffer, qm, qm_length * 4);
298
299     BEGIN_BCS_BATCH(batch, 18);
300     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
301     OUT_BCS_BATCH(batch, qm_type << 0);
302     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
303     ADVANCE_BCS_BATCH(batch);
304 }
305
306 static void
307 gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
308 {
309     unsigned int qm[16] = {
310         0x10101010, 0x10101010, 0x10101010, 0x10101010,
311         0x10101010, 0x10101010, 0x10101010, 0x10101010,
312         0x10101010, 0x10101010, 0x10101010, 0x10101010,
313         0x10101010, 0x10101010, 0x10101010, 0x10101010
314     };
315
316     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
317     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
318     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
319     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
320 }
321
322 static void
323 gen8_mfc_fqm_state(VADriverContextP ctx,
324                    int fqm_type,
325                    unsigned int *fqm,
326                    int fqm_length,
327                    struct intel_encoder_context *encoder_context)
328 {
329     struct intel_batchbuffer *batch = encoder_context->base.batch;
330     unsigned int fqm_buffer[32];
331
332     assert(fqm_length <= 32);
333     assert(sizeof(*fqm) == 4);
334     memcpy(fqm_buffer, fqm, fqm_length * 4);
335
336     BEGIN_BCS_BATCH(batch, 34);
337     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
338     OUT_BCS_BATCH(batch, fqm_type << 0);
339     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
340     ADVANCE_BCS_BATCH(batch);
341 }
342
343 static void
344 gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
345 {
346     unsigned int qm[32] = {
347         0x10001000, 0x10001000, 0x10001000, 0x10001000,
348         0x10001000, 0x10001000, 0x10001000, 0x10001000,
349         0x10001000, 0x10001000, 0x10001000, 0x10001000,
350         0x10001000, 0x10001000, 0x10001000, 0x10001000,
351         0x10001000, 0x10001000, 0x10001000, 0x10001000,
352         0x10001000, 0x10001000, 0x10001000, 0x10001000,
353         0x10001000, 0x10001000, 0x10001000, 0x10001000,
354         0x10001000, 0x10001000, 0x10001000, 0x10001000
355     };
356
357     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
358     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
359     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
360     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
361 }
362
363 static void
364 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
365                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
366                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
367                            struct intel_batchbuffer *batch)
368 {
369     if (batch == NULL)
370         batch = encoder_context->base.batch;
371
372     if (data_bits_in_last_dw == 0)
373         data_bits_in_last_dw = 32;
374
375     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
376
377     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
378     OUT_BCS_BATCH(batch,
379                   (0 << 16) |   /* always start at offset 0 */
380                   (data_bits_in_last_dw << 8) |
381                   (skip_emul_byte_count << 4) |
382                   (!!emulation_flag << 3) |
383                   ((!!is_last_header) << 2) |
384                   ((!!is_end_of_slice) << 1) |
385                   (0 << 0));    /* FIXME: ??? */
386     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
387
388     ADVANCE_BCS_BATCH(batch);
389 }
390
391
392 static void gen8_mfc_init(VADriverContextP ctx,
393                           struct encode_state *encode_state,
394                           struct intel_encoder_context *encoder_context)
395 {
396     struct i965_driver_data *i965 = i965_driver_data(ctx);
397     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
398     dri_bo *bo;
399     int i;
400     int width_in_mbs = 0;
401     int height_in_mbs = 0;
402     int slice_batchbuffer_size;
403
404     if (encoder_context->codec == CODEC_H264 ||
405         encoder_context->codec == CODEC_H264_MVC) {
406         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
407         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
408         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
409     } else {
410         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
411
412         assert(encoder_context->codec == CODEC_MPEG2);
413
414         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
415         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
416     }
417
418     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
419                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
420
421     /*Encode common setup for MFC*/
422     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
423     mfc_context->post_deblocking_output.bo = NULL;
424
425     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
426     mfc_context->pre_deblocking_output.bo = NULL;
427
428     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
429     mfc_context->uncompressed_picture_source.bo = NULL;
430
431     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
432     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
433
434     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
435         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
436         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
437         mfc_context->direct_mv_buffers[i].bo = NULL;
438     }
439
440     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
441         if (mfc_context->reference_surfaces[i].bo != NULL)
442             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
443         mfc_context->reference_surfaces[i].bo = NULL;  
444     }
445
446     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
447     bo = dri_bo_alloc(i965->intel.bufmgr,
448                       "Buffer",
449                       width_in_mbs * 64,
450                       64);
451     assert(bo);
452     mfc_context->intra_row_store_scratch_buffer.bo = bo;
453
454     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
455     bo = dri_bo_alloc(i965->intel.bufmgr,
456                       "Buffer",
457                       width_in_mbs * height_in_mbs * 16,
458                       64);
459     assert(bo);
460     mfc_context->macroblock_status_buffer.bo = bo;
461
462     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
463     bo = dri_bo_alloc(i965->intel.bufmgr,
464                       "Buffer",
465                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
466                       64);
467     assert(bo);
468     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
469
470     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
471     bo = dri_bo_alloc(i965->intel.bufmgr,
472                       "Buffer",
473                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
474                       0x1000);
475     assert(bo);
476     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
477
478     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
479     mfc_context->mfc_batchbuffer_surface.bo = NULL;
480
481     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
482     mfc_context->aux_batchbuffer_surface.bo = NULL;
483
484     if (mfc_context->aux_batchbuffer)
485         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
486
487     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
488     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
489     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
490     mfc_context->aux_batchbuffer_surface.pitch = 16;
491     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
492     mfc_context->aux_batchbuffer_surface.size_block = 16;
493
494     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
495 }
496
497 static void
498 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
499                              struct intel_encoder_context *encoder_context)
500 {
501     struct intel_batchbuffer *batch = encoder_context->base.batch;
502     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
503     int i;
504
505     BEGIN_BCS_BATCH(batch, 61);
506
507     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
508
509     /* the DW1-3 is for pre_deblocking */
510     if (mfc_context->pre_deblocking_output.bo)
511         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
512                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
513                       0);
514     else
515         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
516
517     OUT_BCS_BATCH(batch, 0);
518     OUT_BCS_BATCH(batch, 0);
519     /* the DW4-6 is for the post_deblocking */
520
521     if (mfc_context->post_deblocking_output.bo)
522         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
523                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
524                       0);                                                                                       /* post output addr  */ 
525     else
526         OUT_BCS_BATCH(batch, 0);
527     
528     OUT_BCS_BATCH(batch, 0);
529     OUT_BCS_BATCH(batch, 0);
530
531     /* the DW7-9 is for the uncompressed_picture */
532     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
533                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
534                   0); /* uncompressed data */
535
536     OUT_BCS_BATCH(batch, 0);
537     OUT_BCS_BATCH(batch, 0);
538
539     /* the DW10-12 is for the mb status */
540     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
541                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
542                   0); /* StreamOut data*/
543     
544     OUT_BCS_BATCH(batch, 0);
545     OUT_BCS_BATCH(batch, 0);
546
547     /* the DW13-15 is for the intra_row_store_scratch */
548     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
549                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
550                   0);   
551
552     OUT_BCS_BATCH(batch, 0);
553     OUT_BCS_BATCH(batch, 0);
554
555     /* the DW16-18 is for the deblocking filter */
556     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
557                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
558                   0);
559
560     OUT_BCS_BATCH(batch, 0);
561     OUT_BCS_BATCH(batch, 0);
562
563     /* the DW 19-50 is for Reference pictures*/
564     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
565         if ( mfc_context->reference_surfaces[i].bo != NULL) {
566             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
567                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                           0);                   
569         } else {
570             OUT_BCS_BATCH(batch, 0);
571         }
572
573         OUT_BCS_BATCH(batch, 0);
574     }
575
576     OUT_BCS_BATCH(batch, 0);
577
578     /* The DW 52-54 is for the MB status buffer */
579     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
580                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
581                   0);                                                                                   /* Macroblock status buffer*/
582         
583     OUT_BCS_BATCH(batch, 0);
584     OUT_BCS_BATCH(batch, 0);
585
586     /* the DW 55-57 is the ILDB buffer */
587     OUT_BCS_BATCH(batch, 0);
588     OUT_BCS_BATCH(batch, 0);
589     OUT_BCS_BATCH(batch, 0);
590
591     /* the DW 58-60 is the second ILDB buffer */
592     OUT_BCS_BATCH(batch, 0);
593     OUT_BCS_BATCH(batch, 0);
594     OUT_BCS_BATCH(batch, 0);
595
596     ADVANCE_BCS_BATCH(batch);
597 }
598
599 static void
600 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
601                               struct intel_encoder_context *encoder_context)
602 {
603     struct intel_batchbuffer *batch = encoder_context->base.batch;
604     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
605
606     int i;
607
608     BEGIN_BCS_BATCH(batch, 71);
609
610     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
611
612     /* Reference frames and Current frames */
613     /* the DW1-32 is for the direct MV for reference */
614     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
615         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
616             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
617                           I915_GEM_DOMAIN_INSTRUCTION, 0,
618                           0);
619             OUT_BCS_BATCH(batch, 0);
620         } else {
621             OUT_BCS_BATCH(batch, 0);
622             OUT_BCS_BATCH(batch, 0);
623         }
624     }
625     
626     OUT_BCS_BATCH(batch, 0);
627
628     /* the DW34-36 is the MV for the current reference */
629     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
630                   I915_GEM_DOMAIN_INSTRUCTION, 0,
631                   0);
632
633     OUT_BCS_BATCH(batch, 0);
634     OUT_BCS_BATCH(batch, 0);
635
636     /* POL list */
637     for(i = 0; i < 32; i++) {
638         OUT_BCS_BATCH(batch, i/2);
639     }
640     OUT_BCS_BATCH(batch, 0);
641     OUT_BCS_BATCH(batch, 0);
642
643     ADVANCE_BCS_BATCH(batch);
644 }
645
646
647 static void
648 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
649                                  struct intel_encoder_context *encoder_context)
650 {
651     struct intel_batchbuffer *batch = encoder_context->base.batch;
652     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
653
654     BEGIN_BCS_BATCH(batch, 10);
655
656     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
657     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
658                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
659                   0);
660     OUT_BCS_BATCH(batch, 0);
661     OUT_BCS_BATCH(batch, 0);
662         
663     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
664     OUT_BCS_BATCH(batch, 0);
665     OUT_BCS_BATCH(batch, 0);
666     OUT_BCS_BATCH(batch, 0);
667
668     /* the DW7-9 is for Bitplane Read Buffer Base Address */
669     OUT_BCS_BATCH(batch, 0);
670     OUT_BCS_BATCH(batch, 0);
671     OUT_BCS_BATCH(batch, 0);
672
673     ADVANCE_BCS_BATCH(batch);
674 }
675
676
677 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
678                                                       struct encode_state *encode_state,
679                                                       struct intel_encoder_context *encoder_context)
680 {
681     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
682
683     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
684     mfc_context->set_surface_state(ctx, encoder_context);
685     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
686     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
687     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
688     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
689     mfc_context->avc_qm_state(ctx, encoder_context);
690     mfc_context->avc_fqm_state(ctx, encoder_context);
691     gen8_mfc_avc_directmode_state(ctx, encoder_context); 
692     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
693 }
694
695
696 static VAStatus gen8_mfc_run(VADriverContextP ctx, 
697                              struct encode_state *encode_state,
698                              struct intel_encoder_context *encoder_context)
699 {
700     struct intel_batchbuffer *batch = encoder_context->base.batch;
701
702     intel_batchbuffer_flush(batch);             //run the pipeline
703
704     return VA_STATUS_SUCCESS;
705 }
706
707
708 static VAStatus
709 gen8_mfc_stop(VADriverContextP ctx, 
710               struct encode_state *encode_state,
711               struct intel_encoder_context *encoder_context,
712               int *encoded_bits_size)
713 {
714     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
715     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
716     VACodedBufferSegment *coded_buffer_segment;
717     
718     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
719     assert(vaStatus == VA_STATUS_SUCCESS);
720     *encoded_bits_size = coded_buffer_segment->size * 8;
721     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
722
723     return VA_STATUS_SUCCESS;
724 }
725
726
727 static void
728 gen8_mfc_avc_slice_state(VADriverContextP ctx,
729                          VAEncPictureParameterBufferH264 *pic_param,
730                          VAEncSliceParameterBufferH264 *slice_param,
731                          struct encode_state *encode_state,
732                          struct intel_encoder_context *encoder_context,
733                          int rate_control_enable,
734                          int qp,
735                          struct intel_batchbuffer *batch)
736 {
737     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
738     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
739     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
740     int beginmb = slice_param->macroblock_address;
741     int endmb = beginmb + slice_param->num_macroblocks;
742     int beginx = beginmb % width_in_mbs;
743     int beginy = beginmb / width_in_mbs;
744     int nextx =  endmb % width_in_mbs;
745     int nexty = endmb / width_in_mbs;
746     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
747     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
748     int maxQpN, maxQpP;
749     unsigned char correct[6], grow, shrink;
750     int i;
751     int weighted_pred_idc = 0;
752     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
753     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
754     int num_ref_l0 = 0, num_ref_l1 = 0;
755
756     if (batch == NULL)
757         batch = encoder_context->base.batch;
758
759     if (slice_type == SLICE_TYPE_I) {
760         luma_log2_weight_denom = 0;
761         chroma_log2_weight_denom = 0;
762     } else if (slice_type == SLICE_TYPE_P) {
763         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
764         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
765
766         if (slice_param->num_ref_idx_active_override_flag)
767             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
768     } else if (slice_type == SLICE_TYPE_B) {
769         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
770         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
771         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
772
773         if (slice_param->num_ref_idx_active_override_flag) {
774             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
775             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
776         }
777
778         if (weighted_pred_idc == 2) {
779             /* 8.4.3 - Derivation process for prediction weights (8-279) */
780             luma_log2_weight_denom = 5;
781             chroma_log2_weight_denom = 5;
782         }
783     }
784
785     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
786     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
787
788     for (i = 0; i < 6; i++)
789         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
790
791     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
792         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
793     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
794         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
795
796     BEGIN_BCS_BATCH(batch, 11);;
797
798     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
799     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
800
801     OUT_BCS_BATCH(batch,
802                   (num_ref_l0 << 16) |
803                   (num_ref_l1 << 24) |
804                   (chroma_log2_weight_denom << 8) |
805                   (luma_log2_weight_denom << 0));
806
807     OUT_BCS_BATCH(batch, 
808                   (weighted_pred_idc << 30) |
809                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
810                   (slice_param->disable_deblocking_filter_idc << 27) |
811                   (slice_param->cabac_init_idc << 24) |
812                   (qp<<16) |                    /*Slice Quantization Parameter*/
813                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
814                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
815     OUT_BCS_BATCH(batch,
816                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
817                   (beginx << 16) |
818                   slice_param->macroblock_address );
819     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
820     OUT_BCS_BATCH(batch, 
821                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
822                   (1 << 30) |           /*ResetRateControlCounter*/
823                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
824                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
825                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
826                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
827                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
828                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
829                   (last_slice << 19) |     /*IsLastSlice*/
830                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
831                   (1 << 17) |       /*HeaderPresentFlag*/       
832                   (1 << 16) |       /*SliceData PresentFlag*/
833                   (1 << 15) |       /*TailPresentFlag*/
834                   (1 << 13) |       /*RBSP NAL TYPE*/   
835                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
836     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
837     OUT_BCS_BATCH(batch,
838                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
839                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
840                   (shrink << 8)  |
841                   (grow << 0));   
842     OUT_BCS_BATCH(batch,
843                   (correct[5] << 20) |
844                   (correct[4] << 16) |
845                   (correct[3] << 12) |
846                   (correct[2] << 8) |
847                   (correct[1] << 4) |
848                   (correct[0] << 0));
849     OUT_BCS_BATCH(batch, 0);
850
851     ADVANCE_BCS_BATCH(batch);
852 }
853
854
855 #ifdef MFC_SOFTWARE_HASWELL
856
857 static int
858 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
859                               int qp,unsigned int *msg,
860                               struct intel_encoder_context *encoder_context,
861                               unsigned char target_mb_size, unsigned char max_mb_size,
862                               struct intel_batchbuffer *batch)
863 {
864     int len_in_dwords = 12;
865     unsigned int intra_msg;
866 #define         INTRA_MSG_FLAG          (1 << 13)
867 #define         INTRA_MBTYPE_MASK       (0x1F0000)
868     if (batch == NULL)
869         batch = encoder_context->base.batch;
870
871     BEGIN_BCS_BATCH(batch, len_in_dwords);
872
873     intra_msg = msg[0] & 0xC0FF;
874     intra_msg |= INTRA_MSG_FLAG;
875     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
876     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
877     OUT_BCS_BATCH(batch, 0);
878     OUT_BCS_BATCH(batch, 0);
879     OUT_BCS_BATCH(batch, 
880                   (0 << 24) |           /* PackedMvNum, Debug*/
881                   (0 << 20) |           /* No motion vector */
882                   (1 << 19) |           /* CbpDcY */
883                   (1 << 18) |           /* CbpDcU */
884                   (1 << 17) |           /* CbpDcV */
885                   intra_msg);
886
887     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
888     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
889     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
890
891     /*Stuff for Intra MB*/
892     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
893     OUT_BCS_BATCH(batch, msg[2]);       
894     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
895     
896     /*MaxSizeInWord and TargetSzieInWord*/
897     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
898                   (target_mb_size << 16) );
899
900     OUT_BCS_BATCH(batch, 0);
901
902     ADVANCE_BCS_BATCH(batch);
903
904     return len_in_dwords;
905 }
906
907 static int
908 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
909                               unsigned int *msg, unsigned int offset,
910                               struct intel_encoder_context *encoder_context,
911                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
912                               struct intel_batchbuffer *batch)
913 {
914     struct gen6_vme_context *vme_context = encoder_context->vme_context;
915     int len_in_dwords = 12;
916     unsigned int inter_msg = 0;
917     if (batch == NULL)
918         batch = encoder_context->base.batch;
919     {
920 #define MSG_MV_OFFSET   4
921         unsigned int *mv_ptr;
922         mv_ptr = msg + MSG_MV_OFFSET;
923         /* MV of VME output is based on 16 sub-blocks. So it is necessary
924          * to convert them to be compatible with the format of AVC_PAK
925          * command.
926          */
927         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
928             /* MV[0] and MV[2] are replicated */
929             mv_ptr[4] = mv_ptr[0];
930             mv_ptr[5] = mv_ptr[1];
931             mv_ptr[2] = mv_ptr[8];
932             mv_ptr[3] = mv_ptr[9];
933             mv_ptr[6] = mv_ptr[8];
934             mv_ptr[7] = mv_ptr[9];
935         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
936             /* MV[0] and MV[1] are replicated */
937             mv_ptr[2] = mv_ptr[0];
938             mv_ptr[3] = mv_ptr[1];
939             mv_ptr[4] = mv_ptr[16];
940             mv_ptr[5] = mv_ptr[17];
941             mv_ptr[6] = mv_ptr[24];
942             mv_ptr[7] = mv_ptr[25];
943         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
944                    !(msg[1] & SUBMB_SHAPE_MASK)) {
945             /* Don't touch MV[0] or MV[1] */
946             mv_ptr[2] = mv_ptr[8];
947             mv_ptr[3] = mv_ptr[9];
948             mv_ptr[4] = mv_ptr[16];
949             mv_ptr[5] = mv_ptr[17];
950             mv_ptr[6] = mv_ptr[24];
951             mv_ptr[7] = mv_ptr[25];
952         }
953     }
954
955     BEGIN_BCS_BATCH(batch, len_in_dwords);
956
957     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
958
959     inter_msg = 32;
960     /* MV quantity */
961     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
962         if (msg[1] & SUBMB_SHAPE_MASK)
963             inter_msg = 128;
964     }
965     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
966     OUT_BCS_BATCH(batch, offset);
967     inter_msg = msg[0] & (0x1F00FFFF);
968     inter_msg |= INTER_MV8;
969     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
970     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
971         (msg[1] & SUBMB_SHAPE_MASK)) {
972         inter_msg |= INTER_MV32;
973     }
974
975     OUT_BCS_BATCH(batch, inter_msg);
976
977     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
978     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
979 #if 0 
980     if ( slice_type == SLICE_TYPE_B) {
981         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
982     } else {
983         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
984     }
985 #else
986     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
987 #endif
988
989     inter_msg = msg[1] >> 8;
990     /*Stuff for Inter MB*/
991     OUT_BCS_BATCH(batch, inter_msg);        
992     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
993     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
994
995     /*MaxSizeInWord and TargetSzieInWord*/
996     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
997                   (target_mb_size << 16) );
998
999     OUT_BCS_BATCH(batch, 0x0);    
1000
1001     ADVANCE_BCS_BATCH(batch);
1002
1003     return len_in_dwords;
1004 }
1005
1006 #define         AVC_INTRA_RDO_OFFSET    4
1007 #define         AVC_INTER_RDO_OFFSET    10
1008 #define         AVC_INTER_MSG_OFFSET    8       
1009 #define         AVC_INTER_MV_OFFSET             48
1010 #define         AVC_RDO_MASK            0xFFFF
1011
1012 static void 
1013 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1014                                        struct encode_state *encode_state,
1015                                        struct intel_encoder_context *encoder_context,
1016                                        int slice_index,
1017                                        struct intel_batchbuffer *slice_batch)
1018 {
1019     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1020     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1021     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1022     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1023     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1024     unsigned int *msg = NULL, offset = 0;
1025     unsigned char *msg_ptr = NULL;
1026     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1027     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1028     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1029     int i,x,y;
1030     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1031     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1032     unsigned int tail_data[] = { 0x0, 0x0 };
1033     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1034     int is_intra = slice_type == SLICE_TYPE_I;
1035     int qp_slice;
1036
1037     qp_slice = qp;
1038     if (rate_control_mode == VA_RC_CBR) {
1039         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1040         if (encode_state->slice_header_index[slice_index] == 0) {
1041             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1042             qp_slice = qp;
1043         }
1044     }
1045
1046     /* only support for 8-bit pixel bit-depth */
1047     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1048     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1049     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1050     assert(qp >= 0 && qp < 52);
1051
1052     gen8_mfc_avc_slice_state(ctx,
1053                              pPicParameter,
1054                              pSliceParameter,
1055                              encode_state, encoder_context,
1056                              (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1057
1058     if ( slice_index == 0)
1059         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1060
1061     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1062
1063     dri_bo_map(vme_context->vme_output.bo , 1);
1064     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1065
1066     if (is_intra) {
1067         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1068     } else {
1069         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1070     }
1071    
1072     for (i = pSliceParameter->macroblock_address; 
1073          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1074         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1075         x = i % width_in_mbs;
1076         y = i / width_in_mbs;
1077         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1078
1079         if (is_intra) {
1080             assert(msg);
1081             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1082         } else {
1083             int inter_rdo, intra_rdo;
1084             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1085             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1086             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1087             if (intra_rdo < inter_rdo) { 
1088                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1089             } else {
1090                 msg += AVC_INTER_MSG_OFFSET;
1091                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1092             }
1093         }
1094     }
1095    
1096     dri_bo_unmap(vme_context->vme_output.bo);
1097
1098     if ( last_slice ) {    
1099         mfc_context->insert_object(ctx, encoder_context,
1100                                    tail_data, 2, 8,
1101                                    2, 1, 1, 0, slice_batch);
1102     } else {
1103         mfc_context->insert_object(ctx, encoder_context,
1104                                    tail_data, 1, 8,
1105                                    1, 1, 1, 0, slice_batch);
1106     }
1107 }
1108
1109 static dri_bo *
1110 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1111                                   struct encode_state *encode_state,
1112                                   struct intel_encoder_context *encoder_context)
1113 {
1114     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1115     struct i965_driver_data *i965 = i965_driver_data(ctx);
1116     struct intel_batchbuffer *batch;
1117     dri_bo *batch_bo;
1118     int i;
1119
1120     batch = mfc_context->aux_batchbuffer;
1121     batch_bo = batch->buffer;
1122     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1123         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1124     }
1125
1126     intel_batchbuffer_align(batch, 8);
1127     
1128     BEGIN_BCS_BATCH(batch, 2);
1129     OUT_BCS_BATCH(batch, 0);
1130     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1131     ADVANCE_BCS_BATCH(batch);
1132
1133     dri_bo_reference(batch_bo);
1134     intel_batchbuffer_free(batch);
1135     mfc_context->aux_batchbuffer = NULL;
1136
1137     return batch_bo;
1138 }
1139
1140 #else
1141
1142 static void
1143 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1144                                     struct encode_state *encode_state,
1145                                     struct intel_encoder_context *encoder_context)
1146
1147 {
1148     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1149     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1150
1151     assert(vme_context->vme_output.bo);
1152     mfc_context->buffer_suface_setup(ctx,
1153                                      &mfc_context->gpe_context,
1154                                      &vme_context->vme_output,
1155                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1156                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1157     assert(mfc_context->aux_batchbuffer_surface.bo);
1158     mfc_context->buffer_suface_setup(ctx,
1159                                      &mfc_context->gpe_context,
1160                                      &mfc_context->aux_batchbuffer_surface,
1161                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1162                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1163 }
1164
1165 static void
1166 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1167                                      struct encode_state *encode_state,
1168                                      struct intel_encoder_context *encoder_context)
1169
1170 {
1171     struct i965_driver_data *i965 = i965_driver_data(ctx);
1172     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1173     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1174     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1175     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1176     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1177     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1178     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1179     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1180                                                            "MFC batchbuffer",
1181                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1182                                                            0x1000);
1183     mfc_context->buffer_suface_setup(ctx,
1184                                      &mfc_context->gpe_context,
1185                                      &mfc_context->mfc_batchbuffer_surface,
1186                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1187                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1188 }
1189
1190 static void
1191 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1192                                     struct encode_state *encode_state,
1193                                     struct intel_encoder_context *encoder_context)
1194 {
1195     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1196     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1197 }
1198
1199 static void
1200 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1201                                 struct encode_state *encode_state,
1202                                 struct intel_encoder_context *encoder_context)
1203 {
1204     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1205     struct gen6_interface_descriptor_data *desc;   
1206     int i;
1207     dri_bo *bo;
1208
1209     bo = mfc_context->gpe_context.idrt.bo;
1210     dri_bo_map(bo, 1);
1211     assert(bo->virtual);
1212     desc = bo->virtual;
1213
1214     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1215         struct i965_kernel *kernel;
1216
1217         kernel = &mfc_context->gpe_context.kernels[i];
1218         assert(sizeof(*desc) == 32);
1219
1220         /*Setup the descritor table*/
1221         memset(desc, 0, sizeof(*desc));
1222         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1223         desc->desc2.sampler_count = 0;
1224         desc->desc2.sampler_state_pointer = 0;
1225         desc->desc3.binding_table_entry_count = 2;
1226         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1227         desc->desc4.constant_urb_entry_read_offset = 0;
1228         desc->desc4.constant_urb_entry_read_length = 4;
1229                 
1230         /*kernel start*/
1231         dri_bo_emit_reloc(bo,   
1232                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1233                           0,
1234                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1235                           kernel->bo);
1236         desc++;
1237     }
1238
1239     dri_bo_unmap(bo);
1240 }
1241
1242 static void
1243 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1244                                     struct encode_state *encode_state,
1245                                     struct intel_encoder_context *encoder_context)
1246 {
1247     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1248     
1249     (void)mfc_context;
1250 }
1251
1252 static void
1253 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1254                                          int index,
1255                                          int head_offset,
1256                                          int batchbuffer_offset,
1257                                          int head_size,
1258                                          int tail_size,
1259                                          int number_mb_cmds,
1260                                          int first_object,
1261                                          int last_object,
1262                                          int last_slice,
1263                                          int mb_x,
1264                                          int mb_y,
1265                                          int width_in_mbs,
1266                                          int qp)
1267 {
1268     BEGIN_BATCH(batch, 12);
1269     
1270     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1271     OUT_BATCH(batch, index);
1272     OUT_BATCH(batch, 0);
1273     OUT_BATCH(batch, 0);
1274     OUT_BATCH(batch, 0);
1275     OUT_BATCH(batch, 0);
1276    
1277     /*inline data */
1278     OUT_BATCH(batch, head_offset);
1279     OUT_BATCH(batch, batchbuffer_offset);
1280     OUT_BATCH(batch, 
1281               head_size << 16 |
1282               tail_size);
1283     OUT_BATCH(batch,
1284               number_mb_cmds << 16 |
1285               first_object << 2 |
1286               last_object << 1 |
1287               last_slice);
1288     OUT_BATCH(batch,
1289               mb_y << 8 |
1290               mb_x);
1291     OUT_BATCH(batch,
1292               qp << 16 |
1293               width_in_mbs);
1294
1295     ADVANCE_BATCH(batch);
1296 }
1297
1298 static void
1299 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1300                                        struct intel_encoder_context *encoder_context,
1301                                        VAEncSliceParameterBufferH264 *slice_param,
1302                                        int head_offset,
1303                                        unsigned short head_size,
1304                                        unsigned short tail_size,
1305                                        int batchbuffer_offset,
1306                                        int qp,
1307                                        int last_slice)
1308 {
1309     struct intel_batchbuffer *batch = encoder_context->base.batch;
1310     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1311     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1312     int total_mbs = slice_param->num_macroblocks;
1313     int number_mb_cmds = 128;
1314     int starting_mb = 0;
1315     int last_object = 0;
1316     int first_object = 1;
1317     int i;
1318     int mb_x, mb_y;
1319     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1320
1321     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1322         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1323         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1324         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1325         assert(mb_x <= 255 && mb_y <= 255);
1326
1327         starting_mb += number_mb_cmds;
1328
1329         gen8_mfc_batchbuffer_emit_object_command(batch,
1330                                                  index,
1331                                                  head_offset,
1332                                                  batchbuffer_offset,
1333                                                  head_size,
1334                                                  tail_size,
1335                                                  number_mb_cmds,
1336                                                  first_object,
1337                                                  last_object,
1338                                                  last_slice,
1339                                                  mb_x,
1340                                                  mb_y,
1341                                                  width_in_mbs,
1342                                                  qp);
1343
1344         if (first_object) {
1345             head_offset += head_size;
1346             batchbuffer_offset += head_size;
1347         }
1348
1349         if (last_object) {
1350             head_offset += tail_size;
1351             batchbuffer_offset += tail_size;
1352         }
1353
1354         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1355
1356         first_object = 0;
1357     }
1358
1359     if (!last_object) {
1360         last_object = 1;
1361         number_mb_cmds = total_mbs % number_mb_cmds;
1362         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1363         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1364         assert(mb_x <= 255 && mb_y <= 255);
1365         starting_mb += number_mb_cmds;
1366
1367         gen8_mfc_batchbuffer_emit_object_command(batch,
1368                                                  index,
1369                                                  head_offset,
1370                                                  batchbuffer_offset,
1371                                                  head_size,
1372                                                  tail_size,
1373                                                  number_mb_cmds,
1374                                                  first_object,
1375                                                  last_object,
1376                                                  last_slice,
1377                                                  mb_x,
1378                                                  mb_y,
1379                                                  width_in_mbs,
1380                                                  qp);
1381     }
1382 }
1383                           
1384 /*
1385  * return size in Owords (16bytes)
1386  */         
1387 static int
1388 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1389                                struct encode_state *encode_state,
1390                                struct intel_encoder_context *encoder_context,
1391                                int slice_index,
1392                                int batchbuffer_offset)
1393 {
1394     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1395     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1396     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1397     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1398     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1399     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1400     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1401     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1402     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1403     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1404     unsigned int tail_data[] = { 0x0, 0x0 };
1405     long head_offset;
1406     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1407     unsigned short head_size, tail_size;
1408     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1409     int qp_slice;
1410
1411     qp_slice = qp;
1412     if (rate_control_mode == VA_RC_CBR) {
1413         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1414         if (encode_state->slice_header_index[slice_index] == 0) {
1415             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1416             qp_slice = qp;
1417         }
1418     }
1419
1420     /* only support for 8-bit pixel bit-depth */
1421     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1422     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1423     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1424     assert(qp >= 0 && qp < 52);
1425
1426     head_offset = old_used / 16;
1427     gen8_mfc_avc_slice_state(ctx,
1428                              pPicParameter,
1429                              pSliceParameter,
1430                              encode_state,
1431                              encoder_context,
1432                              (rate_control_mode == VA_RC_CBR),
1433                              qp_slice,
1434                              slice_batch);
1435
1436     if (slice_index == 0)
1437         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1438
1439     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1440
1441     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1442     used = intel_batchbuffer_used_size(slice_batch);
1443     head_size = (used - old_used) / 16;
1444     old_used = used;
1445
1446     /* tail */
1447     if (last_slice) {    
1448         mfc_context->insert_object(ctx,
1449                                    encoder_context,
1450                                    tail_data,
1451                                    2,
1452                                    8,
1453                                    2,
1454                                    1,
1455                                    1,
1456                                    0,
1457                                    slice_batch);
1458     } else {
1459         mfc_context->insert_object(ctx,
1460                                    encoder_context,
1461                                    tail_data,
1462                                    1,
1463                                    8,
1464                                    1,
1465                                    1,
1466                                    1,
1467                                    0,
1468                                    slice_batch);
1469     }
1470
1471     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1472     used = intel_batchbuffer_used_size(slice_batch);
1473     tail_size = (used - old_used) / 16;
1474
1475    
1476     gen8_mfc_avc_batchbuffer_slice_command(ctx,
1477                                            encoder_context,
1478                                            pSliceParameter,
1479                                            head_offset,
1480                                            head_size,
1481                                            tail_size,
1482                                            batchbuffer_offset,
1483                                            qp,
1484                                            last_slice);
1485
1486     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1487 }
1488
1489 static void
1490 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1491                                   struct encode_state *encode_state,
1492                                   struct intel_encoder_context *encoder_context)
1493 {
1494     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1495     struct intel_batchbuffer *batch = encoder_context->base.batch;
1496     int i, size, offset = 0;
1497     intel_batchbuffer_start_atomic(batch, 0x4000); 
1498     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1499
1500     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1501         size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1502         offset += size;
1503     }
1504
1505     intel_batchbuffer_end_atomic(batch);
1506     intel_batchbuffer_flush(batch);
1507 }
1508
1509 static void
1510 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1511                                struct encode_state *encode_state,
1512                                struct intel_encoder_context *encoder_context)
1513 {
1514     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1515     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1516     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1517     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1518 }
1519
1520 static dri_bo *
1521 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1522                                   struct encode_state *encode_state,
1523                                   struct intel_encoder_context *encoder_context)
1524 {
1525     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1526
1527     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1528     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1529
1530     return mfc_context->mfc_batchbuffer_surface.bo;
1531 }
1532
1533 #endif
1534
1535 static void
1536 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1537                                  struct encode_state *encode_state,
1538                                  struct intel_encoder_context *encoder_context)
1539 {
1540     struct intel_batchbuffer *batch = encoder_context->base.batch;
1541     dri_bo *slice_batch_bo;
1542
1543     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1544         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1545         assert(0);
1546         return; 
1547     }
1548
1549 #ifdef MFC_SOFTWARE_HASWELL
1550     slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1551 #else
1552     slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1553 #endif
1554
1555     // begin programing
1556     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1557     intel_batchbuffer_emit_mi_flush(batch);
1558     
1559     // picture level programing
1560     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1561
1562     BEGIN_BCS_BATCH(batch, 3);
1563     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1564     OUT_BCS_RELOC(batch,
1565                   slice_batch_bo,
1566                   I915_GEM_DOMAIN_COMMAND, 0, 
1567                   0);
1568     OUT_BCS_BATCH(batch, 0);
1569     ADVANCE_BCS_BATCH(batch);
1570
1571     // end programing
1572     intel_batchbuffer_end_atomic(batch);
1573
1574     dri_bo_unreference(slice_batch_bo);
1575 }
1576
1577
1578 static VAStatus
1579 gen8_mfc_avc_encode_picture(VADriverContextP ctx, 
1580                             struct encode_state *encode_state,
1581                             struct intel_encoder_context *encoder_context)
1582 {
1583     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1584     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1585     int current_frame_bits_size;
1586     int sts;
1587  
1588     for (;;) {
1589         gen8_mfc_init(ctx, encode_state, encoder_context);
1590         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1591         /*Programing bcs pipeline*/
1592         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1593         gen8_mfc_run(ctx, encode_state, encoder_context);
1594         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1595             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1596             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1597             if (sts == BRC_NO_HRD_VIOLATION) {
1598                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1599                 break;
1600             }
1601             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1602                 if (!mfc_context->hrd.violation_noted) {
1603                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1604                     mfc_context->hrd.violation_noted = 1;
1605                 }
1606                 return VA_STATUS_SUCCESS;
1607             }
1608         } else {
1609             break;
1610         }
1611     }
1612
1613     return VA_STATUS_SUCCESS;
1614 }
1615
1616 /*
1617  * MPEG-2
1618  */
1619
1620 static const int
1621 va_to_gen8_mpeg2_picture_type[3] = {
1622     1,  /* I */
1623     2,  /* P */
1624     3   /* B */
1625 };
1626
1627 static void
1628 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1629                          struct intel_encoder_context *encoder_context,
1630                          struct encode_state *encode_state)
1631 {
1632     struct intel_batchbuffer *batch = encoder_context->base.batch;
1633     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1634     VAEncPictureParameterBufferMPEG2 *pic_param;
1635     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1636     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1637     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1638
1639     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1640     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1641     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1642
1643     BEGIN_BCS_BATCH(batch, 13);
1644     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1645     OUT_BCS_BATCH(batch,
1646                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1647                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1648                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1649                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1650                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1651                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1652                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1653                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1654                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1655                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1656                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1657                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1658     OUT_BCS_BATCH(batch,
1659                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1660                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1661                   0);
1662     OUT_BCS_BATCH(batch,
1663                   1 << 31 |     /* slice concealment */
1664                   (height_in_mbs - 1) << 16 |
1665                   (width_in_mbs - 1));
1666
1667     if (slice_param && slice_param->quantiser_scale_code >= 14)
1668         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1669     else
1670         OUT_BCS_BATCH(batch, 0);
1671
1672     OUT_BCS_BATCH(batch, 0);
1673     OUT_BCS_BATCH(batch,
1674                   0xFFF << 16 | /* InterMBMaxSize */
1675                   0xFFF << 0 |  /* IntraMBMaxSize */
1676                   0);
1677     OUT_BCS_BATCH(batch, 0);
1678     OUT_BCS_BATCH(batch, 0);
1679     OUT_BCS_BATCH(batch, 0);
1680     OUT_BCS_BATCH(batch, 0);
1681     OUT_BCS_BATCH(batch, 0);
1682     OUT_BCS_BATCH(batch, 0);
1683     ADVANCE_BCS_BATCH(batch);
1684 }
1685
1686 static void
1687 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1688 {
1689     unsigned char intra_qm[64] = {
1690         8, 16, 19, 22, 26, 27, 29, 34,
1691         16, 16, 22, 24, 27, 29, 34, 37,
1692         19, 22, 26, 27, 29, 34, 34, 38,
1693         22, 22, 26, 27, 29, 34, 37, 40,
1694         22, 26, 27, 29, 32, 35, 40, 48,
1695         26, 27, 29, 32, 35, 40, 48, 58,
1696         26, 27, 29, 34, 38, 46, 56, 69,
1697         27, 29, 35, 38, 46, 56, 69, 83
1698     };
1699
1700     unsigned char non_intra_qm[64] = {
1701         16, 16, 16, 16, 16, 16, 16, 16,
1702         16, 16, 16, 16, 16, 16, 16, 16,
1703         16, 16, 16, 16, 16, 16, 16, 16,
1704         16, 16, 16, 16, 16, 16, 16, 16,
1705         16, 16, 16, 16, 16, 16, 16, 16,
1706         16, 16, 16, 16, 16, 16, 16, 16,
1707         16, 16, 16, 16, 16, 16, 16, 16,
1708         16, 16, 16, 16, 16, 16, 16, 16
1709     };
1710
1711     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1712     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1713 }
1714
1715 static void
1716 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1717 {
1718     unsigned short intra_fqm[64] = {
1719         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1720         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1721         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1722         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1723         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1724         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1725         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1726         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1727     };
1728
1729     unsigned short non_intra_fqm[64] = {
1730         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1731         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1732         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1733         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1734         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1735         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1736         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1737         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1738     };
1739
1740     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1741     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1742 }
1743
1744 static void
1745 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1746                                 struct intel_encoder_context *encoder_context,
1747                                 int x, int y,
1748                                 int next_x, int next_y,
1749                                 int is_fisrt_slice_group,
1750                                 int is_last_slice_group,
1751                                 int intra_slice,
1752                                 int qp,
1753                                 struct intel_batchbuffer *batch)
1754 {
1755     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1756
1757     if (batch == NULL)
1758         batch = encoder_context->base.batch;
1759
1760     BEGIN_BCS_BATCH(batch, 8);
1761
1762     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1763     OUT_BCS_BATCH(batch,
1764                   0 << 31 |                             /* MbRateCtrlFlag */
1765                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1766                   1 << 17 |                             /* Insert Header before the first slice group data */
1767                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1768                   1 << 15 |                             /* TailPresentFlag: always 1 */
1769                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1770                   !!intra_slice << 13 |                 /* IntraSlice */
1771                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1772                   0);
1773     OUT_BCS_BATCH(batch,
1774                   next_y << 24 |
1775                   next_x << 16 |
1776                   y << 8 |
1777                   x << 0 |
1778                   0);
1779     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1780     /* bitstream pointer is only loaded once for the first slice of a frame when 
1781      * LoadSlicePointerFlag is 0
1782      */
1783     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1784     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1785     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1786     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1787
1788     ADVANCE_BCS_BATCH(batch);
1789 }
1790
1791 static int
1792 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1793                                 struct intel_encoder_context *encoder_context,
1794                                 int x, int y,
1795                                 int first_mb_in_slice,
1796                                 int last_mb_in_slice,
1797                                 int first_mb_in_slice_group,
1798                                 int last_mb_in_slice_group,
1799                                 int mb_type,
1800                                 int qp_scale_code,
1801                                 int coded_block_pattern,
1802                                 unsigned char target_size_in_word,
1803                                 unsigned char max_size_in_word,
1804                                 struct intel_batchbuffer *batch)
1805 {
1806     int len_in_dwords = 9;
1807
1808     if (batch == NULL)
1809         batch = encoder_context->base.batch;
1810
1811     BEGIN_BCS_BATCH(batch, len_in_dwords);
1812
1813     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1814     OUT_BCS_BATCH(batch,
1815                   0 << 24 |     /* PackedMvNum */
1816                   0 << 20 |     /* MvFormat */
1817                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1818                   0 << 15 |     /* TransformFlag: frame DCT */
1819                   0 << 14 |     /* FieldMbFlag */
1820                   1 << 13 |     /* IntraMbFlag */
1821                   mb_type << 8 |   /* MbType: Intra */
1822                   0 << 2 |      /* SkipMbFlag */
1823                   0 << 0 |      /* InterMbMode */
1824                   0);
1825     OUT_BCS_BATCH(batch, y << 16 | x);
1826     OUT_BCS_BATCH(batch,
1827                   max_size_in_word << 24 |
1828                   target_size_in_word << 16 |
1829                   coded_block_pattern << 6 |      /* CBP */
1830                   0);
1831     OUT_BCS_BATCH(batch,
1832                   last_mb_in_slice << 31 |
1833                   first_mb_in_slice << 30 |
1834                   0 << 27 |     /* EnableCoeffClamp */
1835                   last_mb_in_slice_group << 26 |
1836                   0 << 25 |     /* MbSkipConvDisable */
1837                   first_mb_in_slice_group << 24 |
1838                   0 << 16 |     /* MvFieldSelect */
1839                   qp_scale_code << 0 |
1840                   0);
1841     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1842     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1843     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1844     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1845
1846     ADVANCE_BCS_BATCH(batch);
1847
1848     return len_in_dwords;
1849 }
1850
1851 /* Byte offset */
1852 #define MPEG2_INTER_MV_OFFSET   48 
1853
1854 static struct _mv_ranges
1855 {
1856     int low;    /* in the unit of 1/2 pixel */
1857     int high;   /* in the unit of 1/2 pixel */
1858 } mv_ranges[] = {
1859     {0, 0},
1860     {-16, 15},
1861     {-32, 31},
1862     {-64, 63},
1863     {-128, 127},
1864     {-256, 255},
1865     {-512, 511},
1866     {-1024, 1023},
1867     {-2048, 2047},
1868     {-4096, 4095}
1869 };
1870
1871 static int
1872 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1873 {
1874     if (mv + pos * 16 * 2 < 0 ||
1875         mv + (pos + 1) * 16 * 2 > display_max * 2)
1876         mv = 0;
1877
1878     if (f_code > 0 && f_code < 10) {
1879         if (mv < mv_ranges[f_code].low)
1880             mv = mv_ranges[f_code].low;
1881
1882         if (mv > mv_ranges[f_code].high)
1883             mv = mv_ranges[f_code].high;
1884     }
1885
1886     return mv;
1887 }
1888
1889 static int
1890 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1891                                 struct encode_state *encode_state,
1892                                 struct intel_encoder_context *encoder_context,
1893                                 unsigned int *msg,
1894                                 int width_in_mbs, int height_in_mbs,
1895                                 int x, int y,
1896                                 int first_mb_in_slice,
1897                                 int last_mb_in_slice,
1898                                 int first_mb_in_slice_group,
1899                                 int last_mb_in_slice_group,
1900                                 int qp_scale_code,
1901                                 unsigned char target_size_in_word,
1902                                 unsigned char max_size_in_word,
1903                                 struct intel_batchbuffer *batch)
1904 {
1905     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1906     int len_in_dwords = 9;
1907     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1908     
1909     if (batch == NULL)
1910         batch = encoder_context->base.batch;
1911
1912     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1913     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1914     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1915     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1916     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1917
1918     BEGIN_BCS_BATCH(batch, len_in_dwords);
1919
1920     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1921     OUT_BCS_BATCH(batch,
1922                   2 << 24 |     /* PackedMvNum */
1923                   7 << 20 |     /* MvFormat */
1924                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1925                   0 << 15 |     /* TransformFlag: frame DCT */
1926                   0 << 14 |     /* FieldMbFlag */
1927                   0 << 13 |     /* IntraMbFlag */
1928                   1 << 8 |      /* MbType: Frame-based */
1929                   0 << 2 |      /* SkipMbFlag */
1930                   0 << 0 |      /* InterMbMode */
1931                   0);
1932     OUT_BCS_BATCH(batch, y << 16 | x);
1933     OUT_BCS_BATCH(batch,
1934                   max_size_in_word << 24 |
1935                   target_size_in_word << 16 |
1936                   0x3f << 6 |   /* CBP */
1937                   0);
1938     OUT_BCS_BATCH(batch,
1939                   last_mb_in_slice << 31 |
1940                   first_mb_in_slice << 30 |
1941                   0 << 27 |     /* EnableCoeffClamp */
1942                   last_mb_in_slice_group << 26 |
1943                   0 << 25 |     /* MbSkipConvDisable */
1944                   first_mb_in_slice_group << 24 |
1945                   0 << 16 |     /* MvFieldSelect */
1946                   qp_scale_code << 0 |
1947                   0);
1948
1949     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1950     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1951     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1952     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1953
1954     ADVANCE_BCS_BATCH(batch);
1955
1956     return len_in_dwords;
1957 }
1958
1959 static void
1960 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1961                                            struct encode_state *encode_state,
1962                                            struct intel_encoder_context *encoder_context,
1963                                            struct intel_batchbuffer *slice_batch)
1964 {
1965     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1966     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1967
1968     if (encode_state->packed_header_data[idx]) {
1969         VAEncPackedHeaderParameterBuffer *param = NULL;
1970         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1971         unsigned int length_in_bits;
1972
1973         assert(encode_state->packed_header_param[idx]);
1974         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1975         length_in_bits = param->bit_length;
1976
1977         mfc_context->insert_object(ctx,
1978                                    encoder_context,
1979                                    header_data,
1980                                    ALIGN(length_in_bits, 32) >> 5,
1981                                    length_in_bits & 0x1f,
1982                                    5,   /* FIXME: check it */
1983                                    0,
1984                                    0,
1985                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1986                                    slice_batch);
1987     }
1988
1989     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
1990
1991     if (encode_state->packed_header_data[idx]) {
1992         VAEncPackedHeaderParameterBuffer *param = NULL;
1993         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1994         unsigned int length_in_bits;
1995
1996         assert(encode_state->packed_header_param[idx]);
1997         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1998         length_in_bits = param->bit_length;
1999
2000         mfc_context->insert_object(ctx,
2001                                    encoder_context,
2002                                    header_data,
2003                                    ALIGN(length_in_bits, 32) >> 5,
2004                                    length_in_bits & 0x1f,
2005                                    5,   /* FIXME: check it */
2006                                    0,
2007                                    0,
2008                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2009                                    slice_batch);
2010     }
2011 }
2012
2013 static void 
2014 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2015                                     struct encode_state *encode_state,
2016                                     struct intel_encoder_context *encoder_context,
2017                                     int slice_index,
2018                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2019                                     struct intel_batchbuffer *slice_batch)
2020 {
2021     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2022     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2023     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2024     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2025     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2026     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2027     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2028     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2029     int i, j;
2030     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2031     unsigned int *msg = NULL;
2032     unsigned char *msg_ptr = NULL;
2033
2034     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2035     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2036     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2037     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2038
2039     dri_bo_map(vme_context->vme_output.bo , 0);
2040     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2041
2042     if (next_slice_group_param) {
2043         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2044         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2045     } else {
2046         h_next_start_pos = 0;
2047         v_next_start_pos = height_in_mbs;
2048     }
2049
2050     gen8_mfc_mpeg2_slicegroup_state(ctx,
2051                                     encoder_context,
2052                                     h_start_pos,
2053                                     v_start_pos,
2054                                     h_next_start_pos,
2055                                     v_next_start_pos,
2056                                     slice_index == 0,
2057                                     next_slice_group_param == NULL,
2058                                     slice_param->is_intra_slice,
2059                                     slice_param->quantiser_scale_code,
2060                                     slice_batch);
2061
2062     if (slice_index == 0) 
2063         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2064
2065     /* Insert '00' to make sure the header is valid */
2066     mfc_context->insert_object(ctx,
2067                                encoder_context,
2068                                (unsigned int*)section_delimiter,
2069                                1,
2070                                8,   /* 8bits in the last DWORD */
2071                                1,   /* 1 byte */
2072                                1,
2073                                0,
2074                                0,
2075                                slice_batch);
2076
2077     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2078         /* PAK for each macroblocks */
2079         for (j = 0; j < slice_param->num_macroblocks; j++) {
2080             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2081             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2082             int first_mb_in_slice = (j == 0);
2083             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2084             int first_mb_in_slice_group = (i == 0 && j == 0);
2085             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2086                                           j == slice_param->num_macroblocks - 1);
2087
2088             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2089
2090             if (slice_param->is_intra_slice) {
2091                 gen8_mfc_mpeg2_pak_object_intra(ctx,
2092                                                 encoder_context,
2093                                                 h_pos, v_pos,
2094                                                 first_mb_in_slice,
2095                                                 last_mb_in_slice,
2096                                                 first_mb_in_slice_group,
2097                                                 last_mb_in_slice_group,
2098                                                 0x1a,
2099                                                 slice_param->quantiser_scale_code,
2100                                                 0x3f,
2101                                                 0,
2102                                                 0xff,
2103                                                 slice_batch);
2104             } else {
2105                 int inter_rdo, intra_rdo;
2106                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2107                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2108
2109                 if (intra_rdo < inter_rdo) 
2110                     gen8_mfc_mpeg2_pak_object_intra(ctx,
2111                                                      encoder_context,
2112                                                      h_pos, v_pos,
2113                                                      first_mb_in_slice,
2114                                                      last_mb_in_slice,
2115                                                      first_mb_in_slice_group,
2116                                                      last_mb_in_slice_group,
2117                                                      0x1a,
2118                                                      slice_param->quantiser_scale_code,
2119                                                      0x3f,
2120                                                      0,
2121                                                      0xff,
2122                                                      slice_batch);
2123                 else
2124                     gen8_mfc_mpeg2_pak_object_inter(ctx,
2125                                                 encode_state,
2126                                                 encoder_context,
2127                                                 msg,
2128                                                 width_in_mbs, height_in_mbs,
2129                                                 h_pos, v_pos,
2130                                                 first_mb_in_slice,
2131                                                 last_mb_in_slice,
2132                                                 first_mb_in_slice_group,
2133                                                 last_mb_in_slice_group,
2134                                                 slice_param->quantiser_scale_code,
2135                                                 0,
2136                                                 0xff,
2137                                                 slice_batch);
2138             }
2139         }
2140
2141         slice_param++;
2142     }
2143
2144     dri_bo_unmap(vme_context->vme_output.bo);
2145
2146     /* tail data */
2147     if (next_slice_group_param == NULL) { /* end of a picture */
2148         mfc_context->insert_object(ctx,
2149                                    encoder_context,
2150                                    (unsigned int *)tail_delimiter,
2151                                    2,
2152                                    8,   /* 8bits in the last DWORD */
2153                                    5,   /* 5 bytes */
2154                                    1,
2155                                    1,
2156                                    0,
2157                                    slice_batch);
2158     } else {        /* end of a lsice group */
2159         mfc_context->insert_object(ctx,
2160                                    encoder_context,
2161                                    (unsigned int *)section_delimiter,
2162                                    1,
2163                                    8,   /* 8bits in the last DWORD */
2164                                    1,   /* 1 byte */
2165                                    1,
2166                                    1,
2167                                    0,
2168                                    slice_batch);
2169     }
2170 }
2171
2172 /* 
2173  * A batch buffer for all slices, including slice state, 
2174  * slice insert object and slice pak object commands
2175  *
2176  */
2177 static dri_bo *
2178 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2179                                           struct encode_state *encode_state,
2180                                           struct intel_encoder_context *encoder_context)
2181 {
2182     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2183     struct i965_driver_data *i965 = i965_driver_data(ctx);
2184     struct intel_batchbuffer *batch;
2185     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2186     dri_bo *batch_bo;
2187     int i;
2188
2189     batch = mfc_context->aux_batchbuffer;
2190     batch_bo = batch->buffer;
2191
2192     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2193         if (i == encode_state->num_slice_params_ext - 1)
2194             next_slice_group_param = NULL;
2195         else
2196             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2197
2198         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2199     }
2200
2201     intel_batchbuffer_align(batch, 8);
2202     
2203     BEGIN_BCS_BATCH(batch, 2);
2204     OUT_BCS_BATCH(batch, 0);
2205     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2206     ADVANCE_BCS_BATCH(batch);
2207
2208     dri_bo_reference(batch_bo);
2209     intel_batchbuffer_free(batch);
2210     mfc_context->aux_batchbuffer = NULL;
2211
2212     return batch_bo;
2213 }
2214
2215 static void
2216 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2217                                            struct encode_state *encode_state,
2218                                            struct intel_encoder_context *encoder_context)
2219 {
2220     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2221
2222     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2223     mfc_context->set_surface_state(ctx, encoder_context);
2224     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2225     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2226     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2227     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2228     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2229     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2230 }
2231
2232 static void
2233 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2234                                    struct encode_state *encode_state,
2235                                    struct intel_encoder_context *encoder_context)
2236 {
2237     struct intel_batchbuffer *batch = encoder_context->base.batch;
2238     dri_bo *slice_batch_bo;
2239
2240     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2241
2242     // begin programing
2243     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2244     intel_batchbuffer_emit_mi_flush(batch);
2245     
2246     // picture level programing
2247     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2248
2249     BEGIN_BCS_BATCH(batch, 4);
2250     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2251     OUT_BCS_RELOC(batch,
2252                   slice_batch_bo,
2253                   I915_GEM_DOMAIN_COMMAND, 0, 
2254                   0);
2255     OUT_BCS_BATCH(batch, 0);
2256     OUT_BCS_BATCH(batch, 0);
2257     ADVANCE_BCS_BATCH(batch);
2258
2259     // end programing
2260     intel_batchbuffer_end_atomic(batch);
2261
2262     dri_bo_unreference(slice_batch_bo);
2263 }
2264
2265 static VAStatus
2266 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2267                         struct encode_state *encode_state,
2268                         struct intel_encoder_context *encoder_context)
2269 {
2270     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2271     struct object_surface *obj_surface; 
2272     struct object_buffer *obj_buffer;
2273     struct i965_coded_buffer_segment *coded_buffer_segment;
2274     VAStatus vaStatus = VA_STATUS_SUCCESS;
2275     dri_bo *bo;
2276     int i;
2277
2278     /* reconstructed surface */
2279     obj_surface = encode_state->reconstructed_object;
2280     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2281     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2282     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2283     mfc_context->surface_state.width = obj_surface->orig_width;
2284     mfc_context->surface_state.height = obj_surface->orig_height;
2285     mfc_context->surface_state.w_pitch = obj_surface->width;
2286     mfc_context->surface_state.h_pitch = obj_surface->height;
2287
2288     /* forward reference */
2289     obj_surface = encode_state->reference_objects[0];
2290
2291     if (obj_surface && obj_surface->bo) {
2292         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2293         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2294     } else
2295         mfc_context->reference_surfaces[0].bo = NULL;
2296
2297     /* backward reference */
2298     obj_surface = encode_state->reference_objects[1];
2299
2300     if (obj_surface && obj_surface->bo) {
2301         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2302         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2303     } else {
2304         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2305
2306         if (mfc_context->reference_surfaces[1].bo)
2307             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2308     }
2309
2310     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2311         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2312
2313         if (mfc_context->reference_surfaces[i].bo)
2314             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2315     }
2316     
2317     /* input YUV surface */
2318     obj_surface = encode_state->input_yuv_object;
2319     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2320     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2321
2322     /* coded buffer */
2323     obj_buffer = encode_state->coded_buf_object;
2324     bo = obj_buffer->buffer_store->bo;
2325     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2326     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2327     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2328     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2329
2330     /* set the internal flag to 0 to indicate the coded size is unknown */
2331     dri_bo_map(bo, 1);
2332     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2333     coded_buffer_segment->mapped = 0;
2334     coded_buffer_segment->codec = encoder_context->codec;
2335     dri_bo_unmap(bo);
2336
2337     return vaStatus;
2338 }
2339
2340 static VAStatus
2341 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2342                               struct encode_state *encode_state,
2343                               struct intel_encoder_context *encoder_context)
2344 {
2345     gen8_mfc_init(ctx, encode_state, encoder_context);
2346     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2347     /*Programing bcs pipeline*/
2348     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2349     gen8_mfc_run(ctx, encode_state, encoder_context);
2350
2351     return VA_STATUS_SUCCESS;
2352 }
2353
2354 static void
2355 gen8_mfc_context_destroy(void *context)
2356 {
2357     struct gen6_mfc_context *mfc_context = context;
2358     int i;
2359
2360     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2361     mfc_context->post_deblocking_output.bo = NULL;
2362
2363     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2364     mfc_context->pre_deblocking_output.bo = NULL;
2365
2366     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2367     mfc_context->uncompressed_picture_source.bo = NULL;
2368
2369     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2370     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2371
2372     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2373         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2374         mfc_context->direct_mv_buffers[i].bo = NULL;
2375     }
2376
2377     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2378     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2379
2380     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2381     mfc_context->macroblock_status_buffer.bo = NULL;
2382
2383     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2384     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2385
2386     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2387     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2388
2389
2390     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2391         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2392         mfc_context->reference_surfaces[i].bo = NULL;  
2393     }
2394
2395     i965_gpe_context_destroy(&mfc_context->gpe_context);
2396
2397     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2398     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2399
2400     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2401     mfc_context->aux_batchbuffer_surface.bo = NULL;
2402
2403     if (mfc_context->aux_batchbuffer)
2404         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2405
2406     mfc_context->aux_batchbuffer = NULL;
2407
2408     free(mfc_context);
2409 }
2410
2411 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
2412                                   VAProfile profile,
2413                                   struct encode_state *encode_state,
2414                                   struct intel_encoder_context *encoder_context)
2415 {
2416     VAStatus vaStatus;
2417
2418     switch (profile) {
2419     case VAProfileH264ConstrainedBaseline:
2420     case VAProfileH264Main:
2421     case VAProfileH264High:
2422     case VAProfileH264MultiviewHigh:
2423     case VAProfileH264StereoHigh:
2424         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2425         break;
2426
2427         /* FIXME: add for other profile */
2428     case VAProfileMPEG2Simple:
2429     case VAProfileMPEG2Main:
2430         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2431         break;
2432
2433     default:
2434         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2435         break;
2436     }
2437
2438     return vaStatus;
2439 }
2440
2441 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2442 {
2443     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2444
2445     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2446
2447     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2448     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2449
2450     mfc_context->gpe_context.curbe.length = 32 * 4;
2451
2452     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2453     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2454     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2455     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2456     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2457
2458     i965_gpe_load_kernels(ctx,
2459                           &mfc_context->gpe_context,
2460                           gen8_mfc_kernels,
2461                           NUM_MFC_KERNEL);
2462
2463     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
2464     mfc_context->set_surface_state = gen8_mfc_surface_state;
2465     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
2466     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
2467     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
2468     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
2469     mfc_context->insert_object = gen8_mfc_avc_insert_object;
2470     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2471
2472     encoder_context->mfc_context = mfc_context;
2473     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
2474     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
2475     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2476
2477     return True;
2478 }