VEBOX/bdw: set downsample method
[platform/upstream/libva-intel-driver.git] / src / gen8_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
47 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
48 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
49
50 #define MFC_SOFTWARE_HASWELL    1
51
52 #define B0_STEP_REV             2
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
54
55 static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
57 };
58
59 static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
61 };
62
63 static struct i965_kernel gen8_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen8_mfc_batchbuffer_avc_intra,
68         sizeof(gen8_mfc_batchbuffer_avc_intra),
69         NULL
70     },
71
72     {
73         "MFC AVC INTER BATCHBUFFER ",
74         MFC_BATCHBUFFER_AVC_INTER,
75         gen8_mfc_batchbuffer_avc_inter,
76         sizeof(gen8_mfc_batchbuffer_avc_inter),
77         NULL
78     },
79 };
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         INTER_16X8              0x01
84 #define         INTER_8X16              0x02
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
93                           int standard_select,
94                           struct intel_encoder_context *encoder_context)
95 {
96     struct intel_batchbuffer *batch = encoder_context->base.batch;
97     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
98
99     assert(standard_select == MFX_FORMAT_MPEG2 ||
100            standard_select == MFX_FORMAT_AVC);
101
102     BEGIN_BCS_BATCH(batch, 5);
103
104     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
105     OUT_BCS_BATCH(batch,
106                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
107                   (MFD_MODE_VLD << 15) | /* VLD mode */
108                   (0 << 10) | /* Stream-Out Enable */
109                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
110                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
111                   (0 << 5)  | /* not in stitch mode */
112                   (1 << 4)  | /* encoding mode */
113                   (standard_select << 0));  /* standard select: avc or mpeg2 */
114     OUT_BCS_BATCH(batch,
115                   (0 << 7)  | /* expand NOA bus flag */
116                   (0 << 6)  | /* disable slice-level clock gating */
117                   (0 << 5)  | /* disable clock gating for NOA */
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0);
124     OUT_BCS_BATCH(batch, 0);
125
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 {
132     struct intel_batchbuffer *batch = encoder_context->base.batch;
133     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134
135     BEGIN_BCS_BATCH(batch, 6);
136
137     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
138     OUT_BCS_BATCH(batch, 0);
139     OUT_BCS_BATCH(batch,
140                   ((mfc_context->surface_state.height - 1) << 18) |
141                   ((mfc_context->surface_state.width - 1) << 4));
142     OUT_BCS_BATCH(batch,
143                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
144                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
145                   (0 << 22) | /* surface object control state, FIXME??? */
146                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
147                   (0 << 2)  | /* must be 0 for interleave U/V */
148                   (1 << 1)  | /* must be tiled */
149                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
150     OUT_BCS_BATCH(batch,
151                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
152                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
153     OUT_BCS_BATCH(batch, 0);
154
155     ADVANCE_BCS_BATCH(batch);
156 }
157
158 static void
159 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
160                                  struct intel_encoder_context *encoder_context)
161 {
162     struct intel_batchbuffer *batch = encoder_context->base.batch;
163     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
164     struct gen6_vme_context *vme_context = encoder_context->vme_context;
165     int vme_size;
166
167     BEGIN_BCS_BATCH(batch, 26);
168
169     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
170     /* the DW1-3 is for the MFX indirect bistream offset */
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     /* the DW4-5 is the MFX upper bound */
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177
178     vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
179     /* the DW6-10 is for MFX Indirect MV Object Base Address */
180     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
184     OUT_BCS_BATCH(batch, 0);
185
186     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198     OUT_BCS_BATCH(batch, 0);
199
200     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   0);
205     OUT_BCS_BATCH(batch, 0);
206     OUT_BCS_BATCH(batch, 0);
207         
208     OUT_BCS_RELOC(batch,
209                   mfc_context->mfc_indirect_pak_bse_object.bo,
210                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
211                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
212     OUT_BCS_BATCH(batch, 0);
213
214     ADVANCE_BCS_BATCH(batch);
215 }
216
217 static void
218 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
219                        struct intel_encoder_context *encoder_context)
220 {
221     struct intel_batchbuffer *batch = encoder_context->base.batch;
222     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
223     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
224
225     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
226     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
227
228     BEGIN_BCS_BATCH(batch, 16);
229
230     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
231     /*DW1. MB setting of frame */
232     OUT_BCS_BATCH(batch,
233                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
234     OUT_BCS_BATCH(batch, 
235                   ((height_in_mbs - 1) << 16) | 
236                   ((width_in_mbs - 1) << 0));
237     /* DW3 QP setting */
238     OUT_BCS_BATCH(batch, 
239                   (0 << 24) |   /* Second Chroma QP Offset */
240                   (0 << 16) |   /* Chroma QP Offset */
241                   (0 << 14) |   /* Max-bit conformance Intra flag */
242                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
243                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
244                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
245                   (0 << 8)  |   /* FIXME: Image Structure */
246                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
247     OUT_BCS_BATCH(batch,
248                   (0 << 16) |   /* Mininum Frame size */
249                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
250                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
251                   (0 << 13) |   /* CABAC 0 word insertion test enable */
252                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
253                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
254                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
255                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
256                   (0 << 6)  |   /* Only valid for VLD decoding mode */
257                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
258                   (0 << 4)  |   /* Direct 8x8 inference flag */
259                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
260                   (1 << 2)  |   /* Frame MB only flag */
261                   (0 << 1)  |   /* MBAFF mode is in active */
262                   (0 << 0));    /* Field picture flag */
263     /* DW5 Trellis quantization */
264     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
265     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
266                   (0xBB8 << 16) |       /* InterMbMaxSz */
267                   (0xEE8) );            /* IntraMbMaxSz */
268     OUT_BCS_BATCH(batch, 0);            /* Reserved */
269     /* DW8. QP delta */
270     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
271     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
272     /* DW10. Bit setting for MB */
273     OUT_BCS_BATCH(batch, 0x8C000000);
274     OUT_BCS_BATCH(batch, 0x00010000);
275     /* DW12. */
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0x02010100);
278     /* DW14. For short format */
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0);
281
282     ADVANCE_BCS_BATCH(batch);
283 }
284
285 static void
286 gen8_mfc_qm_state(VADriverContextP ctx,
287                   int qm_type,
288                   unsigned int *qm,
289                   int qm_length,
290                   struct intel_encoder_context *encoder_context)
291 {
292     struct intel_batchbuffer *batch = encoder_context->base.batch;
293     unsigned int qm_buffer[16];
294
295     assert(qm_length <= 16);
296     assert(sizeof(*qm) == 4);
297     memcpy(qm_buffer, qm, qm_length * 4);
298
299     BEGIN_BCS_BATCH(batch, 18);
300     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
301     OUT_BCS_BATCH(batch, qm_type << 0);
302     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
303     ADVANCE_BCS_BATCH(batch);
304 }
305
306 static void
307 gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
308 {
309     unsigned int qm[16] = {
310         0x10101010, 0x10101010, 0x10101010, 0x10101010,
311         0x10101010, 0x10101010, 0x10101010, 0x10101010,
312         0x10101010, 0x10101010, 0x10101010, 0x10101010,
313         0x10101010, 0x10101010, 0x10101010, 0x10101010
314     };
315
316     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
317     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
318     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
319     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
320 }
321
322 static void
323 gen8_mfc_fqm_state(VADriverContextP ctx,
324                    int fqm_type,
325                    unsigned int *fqm,
326                    int fqm_length,
327                    struct intel_encoder_context *encoder_context)
328 {
329     struct intel_batchbuffer *batch = encoder_context->base.batch;
330     unsigned int fqm_buffer[32];
331
332     assert(fqm_length <= 32);
333     assert(sizeof(*fqm) == 4);
334     memcpy(fqm_buffer, fqm, fqm_length * 4);
335
336     BEGIN_BCS_BATCH(batch, 34);
337     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
338     OUT_BCS_BATCH(batch, fqm_type << 0);
339     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
340     ADVANCE_BCS_BATCH(batch);
341 }
342
343 static void
344 gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
345 {
346     unsigned int qm[32] = {
347         0x10001000, 0x10001000, 0x10001000, 0x10001000,
348         0x10001000, 0x10001000, 0x10001000, 0x10001000,
349         0x10001000, 0x10001000, 0x10001000, 0x10001000,
350         0x10001000, 0x10001000, 0x10001000, 0x10001000,
351         0x10001000, 0x10001000, 0x10001000, 0x10001000,
352         0x10001000, 0x10001000, 0x10001000, 0x10001000,
353         0x10001000, 0x10001000, 0x10001000, 0x10001000,
354         0x10001000, 0x10001000, 0x10001000, 0x10001000
355     };
356
357     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
358     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
359     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
360     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
361 }
362
363 static void
364 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
365                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
366                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
367                            struct intel_batchbuffer *batch)
368 {
369     if (batch == NULL)
370         batch = encoder_context->base.batch;
371
372     if (data_bits_in_last_dw == 0)
373         data_bits_in_last_dw = 32;
374
375     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
376
377     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
378     OUT_BCS_BATCH(batch,
379                   (0 << 16) |   /* always start at offset 0 */
380                   (data_bits_in_last_dw << 8) |
381                   (skip_emul_byte_count << 4) |
382                   (!!emulation_flag << 3) |
383                   ((!!is_last_header) << 2) |
384                   ((!!is_end_of_slice) << 1) |
385                   (0 << 0));    /* FIXME: ??? */
386     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
387
388     ADVANCE_BCS_BATCH(batch);
389 }
390
391
392 static void gen8_mfc_init(VADriverContextP ctx,
393                           struct encode_state *encode_state,
394                           struct intel_encoder_context *encoder_context)
395 {
396     struct i965_driver_data *i965 = i965_driver_data(ctx);
397     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
398     dri_bo *bo;
399     int i;
400     int width_in_mbs = 0;
401     int height_in_mbs = 0;
402     int slice_batchbuffer_size;
403
404     if (encoder_context->codec == CODEC_H264) {
405         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
406         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
407         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
408     } else {
409         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
410
411         assert(encoder_context->codec == CODEC_MPEG2);
412
413         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
414         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
415     }
416
417     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
418                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
419
420     /*Encode common setup for MFC*/
421     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
422     mfc_context->post_deblocking_output.bo = NULL;
423
424     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
425     mfc_context->pre_deblocking_output.bo = NULL;
426
427     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
428     mfc_context->uncompressed_picture_source.bo = NULL;
429
430     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
431     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
432
433     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
434         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
435         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
436         mfc_context->direct_mv_buffers[i].bo = NULL;
437     }
438
439     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
440         if (mfc_context->reference_surfaces[i].bo != NULL)
441             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
442         mfc_context->reference_surfaces[i].bo = NULL;  
443     }
444
445     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
446     bo = dri_bo_alloc(i965->intel.bufmgr,
447                       "Buffer",
448                       width_in_mbs * 64,
449                       64);
450     assert(bo);
451     mfc_context->intra_row_store_scratch_buffer.bo = bo;
452
453     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
454     bo = dri_bo_alloc(i965->intel.bufmgr,
455                       "Buffer",
456                       width_in_mbs * height_in_mbs * 16,
457                       64);
458     assert(bo);
459     mfc_context->macroblock_status_buffer.bo = bo;
460
461     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
462     bo = dri_bo_alloc(i965->intel.bufmgr,
463                       "Buffer",
464                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
465                       64);
466     assert(bo);
467     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
468
469     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
470     bo = dri_bo_alloc(i965->intel.bufmgr,
471                       "Buffer",
472                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
473                       0x1000);
474     assert(bo);
475     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
476
477     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
478     mfc_context->mfc_batchbuffer_surface.bo = NULL;
479
480     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
481     mfc_context->aux_batchbuffer_surface.bo = NULL;
482
483     if (mfc_context->aux_batchbuffer)
484         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
485
486     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
487     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
488     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
489     mfc_context->aux_batchbuffer_surface.pitch = 16;
490     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
491     mfc_context->aux_batchbuffer_surface.size_block = 16;
492
493     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
494 }
495
496 static void
497 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
498                              struct intel_encoder_context *encoder_context)
499 {
500     struct intel_batchbuffer *batch = encoder_context->base.batch;
501     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
502     int i;
503
504     BEGIN_BCS_BATCH(batch, 61);
505
506     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
507
508     /* the DW1-3 is for pre_deblocking */
509     if (mfc_context->pre_deblocking_output.bo)
510         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
511                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
512                       0);
513     else
514         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
515
516     OUT_BCS_BATCH(batch, 0);
517     OUT_BCS_BATCH(batch, 0);
518     /* the DW4-6 is for the post_deblocking */
519
520     if (mfc_context->post_deblocking_output.bo)
521         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
522                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
523                       0);                                                                                       /* post output addr  */ 
524     else
525         OUT_BCS_BATCH(batch, 0);
526     
527     OUT_BCS_BATCH(batch, 0);
528     OUT_BCS_BATCH(batch, 0);
529
530     /* the DW7-9 is for the uncompressed_picture */
531     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
532                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
533                   0); /* uncompressed data */
534
535     OUT_BCS_BATCH(batch, 0);
536     OUT_BCS_BATCH(batch, 0);
537
538     /* the DW10-12 is for the mb status */
539     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
540                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
541                   0); /* StreamOut data*/
542     
543     OUT_BCS_BATCH(batch, 0);
544     OUT_BCS_BATCH(batch, 0);
545
546     /* the DW13-15 is for the intra_row_store_scratch */
547     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
548                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
549                   0);   
550
551     OUT_BCS_BATCH(batch, 0);
552     OUT_BCS_BATCH(batch, 0);
553
554     /* the DW16-18 is for the deblocking filter */
555     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
556                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
557                   0);
558
559     OUT_BCS_BATCH(batch, 0);
560     OUT_BCS_BATCH(batch, 0);
561
562     /* the DW 19-50 is for Reference pictures*/
563     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
564         if ( mfc_context->reference_surfaces[i].bo != NULL) {
565             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
566                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
567                           0);                   
568         } else {
569             OUT_BCS_BATCH(batch, 0);
570         }
571
572         OUT_BCS_BATCH(batch, 0);
573     }
574
575     OUT_BCS_BATCH(batch, 0);
576
577     /* The DW 52-54 is for the MB status buffer */
578     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
579                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
580                   0);                                                                                   /* Macroblock status buffer*/
581         
582     OUT_BCS_BATCH(batch, 0);
583     OUT_BCS_BATCH(batch, 0);
584
585     /* the DW 55-57 is the ILDB buffer */
586     OUT_BCS_BATCH(batch, 0);
587     OUT_BCS_BATCH(batch, 0);
588     OUT_BCS_BATCH(batch, 0);
589
590     /* the DW 58-60 is the second ILDB buffer */
591     OUT_BCS_BATCH(batch, 0);
592     OUT_BCS_BATCH(batch, 0);
593     OUT_BCS_BATCH(batch, 0);
594
595     ADVANCE_BCS_BATCH(batch);
596 }
597
598 static void
599 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
600                               struct intel_encoder_context *encoder_context)
601 {
602     struct intel_batchbuffer *batch = encoder_context->base.batch;
603     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
604
605     int i;
606
607     BEGIN_BCS_BATCH(batch, 71);
608
609     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
610
611     /* Reference frames and Current frames */
612     /* the DW1-32 is for the direct MV for reference */
613     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
614         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
615             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
616                           I915_GEM_DOMAIN_INSTRUCTION, 0,
617                           0);
618             OUT_BCS_BATCH(batch, 0);
619         } else {
620             OUT_BCS_BATCH(batch, 0);
621             OUT_BCS_BATCH(batch, 0);
622         }
623     }
624     
625     OUT_BCS_BATCH(batch, 0);
626
627     /* the DW34-36 is the MV for the current reference */
628     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
629                   I915_GEM_DOMAIN_INSTRUCTION, 0,
630                   0);
631
632     OUT_BCS_BATCH(batch, 0);
633     OUT_BCS_BATCH(batch, 0);
634
635     /* POL list */
636     for(i = 0; i < 32; i++) {
637         OUT_BCS_BATCH(batch, i/2);
638     }
639     OUT_BCS_BATCH(batch, 0);
640     OUT_BCS_BATCH(batch, 0);
641
642     ADVANCE_BCS_BATCH(batch);
643 }
644
645
646 static void
647 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
648                                  struct intel_encoder_context *encoder_context)
649 {
650     struct intel_batchbuffer *batch = encoder_context->base.batch;
651     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
652
653     BEGIN_BCS_BATCH(batch, 10);
654
655     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
656     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
657                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
658                   0);
659     OUT_BCS_BATCH(batch, 0);
660     OUT_BCS_BATCH(batch, 0);
661         
662     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
663     OUT_BCS_BATCH(batch, 0);
664     OUT_BCS_BATCH(batch, 0);
665     OUT_BCS_BATCH(batch, 0);
666
667     /* the DW7-9 is for Bitplane Read Buffer Base Address */
668     OUT_BCS_BATCH(batch, 0);
669     OUT_BCS_BATCH(batch, 0);
670     OUT_BCS_BATCH(batch, 0);
671
672     ADVANCE_BCS_BATCH(batch);
673 }
674
675
676 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
677                                                       struct encode_state *encode_state,
678                                                       struct intel_encoder_context *encoder_context)
679 {
680     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
681
682     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
683     mfc_context->set_surface_state(ctx, encoder_context);
684     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
685     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
686     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
687     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
688     mfc_context->avc_qm_state(ctx, encoder_context);
689     mfc_context->avc_fqm_state(ctx, encoder_context);
690     gen8_mfc_avc_directmode_state(ctx, encoder_context); 
691     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
692 }
693
694
695 static VAStatus gen8_mfc_run(VADriverContextP ctx, 
696                              struct encode_state *encode_state,
697                              struct intel_encoder_context *encoder_context)
698 {
699     struct intel_batchbuffer *batch = encoder_context->base.batch;
700
701     intel_batchbuffer_flush(batch);             //run the pipeline
702
703     return VA_STATUS_SUCCESS;
704 }
705
706
707 static VAStatus
708 gen8_mfc_stop(VADriverContextP ctx, 
709               struct encode_state *encode_state,
710               struct intel_encoder_context *encoder_context,
711               int *encoded_bits_size)
712 {
713     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
714     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
715     VACodedBufferSegment *coded_buffer_segment;
716     
717     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
718     assert(vaStatus == VA_STATUS_SUCCESS);
719     *encoded_bits_size = coded_buffer_segment->size * 8;
720     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
721
722     return VA_STATUS_SUCCESS;
723 }
724
725
726 static void
727 gen8_mfc_avc_slice_state(VADriverContextP ctx,
728                          VAEncPictureParameterBufferH264 *pic_param,
729                          VAEncSliceParameterBufferH264 *slice_param,
730                          struct encode_state *encode_state,
731                          struct intel_encoder_context *encoder_context,
732                          int rate_control_enable,
733                          int qp,
734                          struct intel_batchbuffer *batch)
735 {
736     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
737     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
738     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
739     int beginmb = slice_param->macroblock_address;
740     int endmb = beginmb + slice_param->num_macroblocks;
741     int beginx = beginmb % width_in_mbs;
742     int beginy = beginmb / width_in_mbs;
743     int nextx =  endmb % width_in_mbs;
744     int nexty = endmb / width_in_mbs;
745     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
746     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
747     int maxQpN, maxQpP;
748     unsigned char correct[6], grow, shrink;
749     int i;
750     int weighted_pred_idc = 0;
751     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
752     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
753     int num_ref_l0 = 0, num_ref_l1 = 0;
754
755     if (batch == NULL)
756         batch = encoder_context->base.batch;
757
758     if (slice_type == SLICE_TYPE_I) {
759         luma_log2_weight_denom = 0;
760         chroma_log2_weight_denom = 0;
761     } else if (slice_type == SLICE_TYPE_P) {
762         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
763         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
764
765         if (slice_param->num_ref_idx_active_override_flag)
766             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
767     } else if (slice_type == SLICE_TYPE_B) {
768         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
769         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
770         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
771
772         if (slice_param->num_ref_idx_active_override_flag) {
773             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
774             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
775         }
776
777         if (weighted_pred_idc == 2) {
778             /* 8.4.3 - Derivation process for prediction weights (8-279) */
779             luma_log2_weight_denom = 5;
780             chroma_log2_weight_denom = 5;
781         }
782     }
783
784     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
785     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
786
787     for (i = 0; i < 6; i++)
788         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
789
790     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
791         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
792     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
793         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
794
795     BEGIN_BCS_BATCH(batch, 11);;
796
797     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
798     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
799
800     OUT_BCS_BATCH(batch,
801                   (num_ref_l0 << 16) |
802                   (num_ref_l1 << 24) |
803                   (chroma_log2_weight_denom << 8) |
804                   (luma_log2_weight_denom << 0));
805
806     OUT_BCS_BATCH(batch, 
807                   (weighted_pred_idc << 30) |
808                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
809                   (slice_param->disable_deblocking_filter_idc << 27) |
810                   (slice_param->cabac_init_idc << 24) |
811                   (qp<<16) |                    /*Slice Quantization Parameter*/
812                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
813                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
814     OUT_BCS_BATCH(batch,
815                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
816                   (beginx << 16) |
817                   slice_param->macroblock_address );
818     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
819     OUT_BCS_BATCH(batch, 
820                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
821                   (1 << 30) |           /*ResetRateControlCounter*/
822                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
823                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
824                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
825                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
826                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
827                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
828                   (last_slice << 19) |     /*IsLastSlice*/
829                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
830                   (1 << 17) |       /*HeaderPresentFlag*/       
831                   (1 << 16) |       /*SliceData PresentFlag*/
832                   (1 << 15) |       /*TailPresentFlag*/
833                   (1 << 13) |       /*RBSP NAL TYPE*/   
834                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
835     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
836     OUT_BCS_BATCH(batch,
837                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
838                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
839                   (shrink << 8)  |
840                   (grow << 0));   
841     OUT_BCS_BATCH(batch,
842                   (correct[5] << 20) |
843                   (correct[4] << 16) |
844                   (correct[3] << 12) |
845                   (correct[2] << 8) |
846                   (correct[1] << 4) |
847                   (correct[0] << 0));
848     OUT_BCS_BATCH(batch, 0);
849
850     ADVANCE_BCS_BATCH(batch);
851 }
852
853
854 #ifdef MFC_SOFTWARE_HASWELL
855
856 static int
857 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
858                               int qp,unsigned int *msg,
859                               struct intel_encoder_context *encoder_context,
860                               unsigned char target_mb_size, unsigned char max_mb_size,
861                               struct intel_batchbuffer *batch)
862 {
863     int len_in_dwords = 12;
864     unsigned int intra_msg;
865 #define         INTRA_MSG_FLAG          (1 << 13)
866 #define         INTRA_MBTYPE_MASK       (0x1F0000)
867     if (batch == NULL)
868         batch = encoder_context->base.batch;
869
870     BEGIN_BCS_BATCH(batch, len_in_dwords);
871
872     intra_msg = msg[0] & 0xC0FF;
873     intra_msg |= INTRA_MSG_FLAG;
874     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
875     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
876     OUT_BCS_BATCH(batch, 0);
877     OUT_BCS_BATCH(batch, 0);
878     OUT_BCS_BATCH(batch, 
879                   (0 << 24) |           /* PackedMvNum, Debug*/
880                   (0 << 20) |           /* No motion vector */
881                   (1 << 19) |           /* CbpDcY */
882                   (1 << 18) |           /* CbpDcU */
883                   (1 << 17) |           /* CbpDcV */
884                   intra_msg);
885
886     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
887     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
888     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
889
890     /*Stuff for Intra MB*/
891     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
892     OUT_BCS_BATCH(batch, msg[2]);       
893     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
894     
895     /*MaxSizeInWord and TargetSzieInWord*/
896     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
897                   (target_mb_size << 16) );
898
899     OUT_BCS_BATCH(batch, 0);
900
901     ADVANCE_BCS_BATCH(batch);
902
903     return len_in_dwords;
904 }
905
906 static int
907 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
908                               unsigned int *msg, unsigned int offset,
909                               struct intel_encoder_context *encoder_context,
910                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
911                               struct intel_batchbuffer *batch)
912 {
913     struct gen6_vme_context *vme_context = encoder_context->vme_context;
914     int len_in_dwords = 12;
915     unsigned int inter_msg = 0;
916     if (batch == NULL)
917         batch = encoder_context->base.batch;
918     {
919 #define MSG_MV_OFFSET   4
920         unsigned int *mv_ptr;
921         mv_ptr = msg + MSG_MV_OFFSET;
922         /* MV of VME output is based on 16 sub-blocks. So it is necessary
923          * to convert them to be compatible with the format of AVC_PAK
924          * command.
925          */
926         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
927             /* MV[0] and MV[2] are replicated */
928             mv_ptr[4] = mv_ptr[0];
929             mv_ptr[5] = mv_ptr[1];
930             mv_ptr[2] = mv_ptr[8];
931             mv_ptr[3] = mv_ptr[9];
932             mv_ptr[6] = mv_ptr[8];
933             mv_ptr[7] = mv_ptr[9];
934         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
935             /* MV[0] and MV[1] are replicated */
936             mv_ptr[2] = mv_ptr[0];
937             mv_ptr[3] = mv_ptr[1];
938             mv_ptr[4] = mv_ptr[16];
939             mv_ptr[5] = mv_ptr[17];
940             mv_ptr[6] = mv_ptr[24];
941             mv_ptr[7] = mv_ptr[25];
942         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
943                    !(msg[1] & SUBMB_SHAPE_MASK)) {
944             /* Don't touch MV[0] or MV[1] */
945             mv_ptr[2] = mv_ptr[8];
946             mv_ptr[3] = mv_ptr[9];
947             mv_ptr[4] = mv_ptr[16];
948             mv_ptr[5] = mv_ptr[17];
949             mv_ptr[6] = mv_ptr[24];
950             mv_ptr[7] = mv_ptr[25];
951         }
952     }
953
954     BEGIN_BCS_BATCH(batch, len_in_dwords);
955
956     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
957
958     inter_msg = 32;
959     /* MV quantity */
960     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
961         if (msg[1] & SUBMB_SHAPE_MASK)
962             inter_msg = 128;
963     }
964     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
965     OUT_BCS_BATCH(batch, offset);
966     inter_msg = msg[0] & (0x1F00FFFF);
967     inter_msg |= INTER_MV8;
968     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
969     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
970         (msg[1] & SUBMB_SHAPE_MASK)) {
971         inter_msg |= INTER_MV32;
972     }
973
974     OUT_BCS_BATCH(batch, inter_msg);
975
976     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
977     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
978 #if 0 
979     if ( slice_type == SLICE_TYPE_B) {
980         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
981     } else {
982         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
983     }
984 #else
985     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
986 #endif
987
988     inter_msg = msg[1] >> 8;
989     /*Stuff for Inter MB*/
990     OUT_BCS_BATCH(batch, inter_msg);        
991     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
992     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
993
994     /*MaxSizeInWord and TargetSzieInWord*/
995     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
996                   (target_mb_size << 16) );
997
998     OUT_BCS_BATCH(batch, 0x0);    
999
1000     ADVANCE_BCS_BATCH(batch);
1001
1002     return len_in_dwords;
1003 }
1004
1005 #define         AVC_INTRA_RDO_OFFSET    4
1006 #define         AVC_INTER_RDO_OFFSET    10
1007 #define         AVC_INTER_MSG_OFFSET    8       
1008 #define         AVC_INTER_MV_OFFSET             48
1009 #define         AVC_RDO_MASK            0xFFFF
1010
1011 static void 
1012 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1013                                        struct encode_state *encode_state,
1014                                        struct intel_encoder_context *encoder_context,
1015                                        int slice_index,
1016                                        struct intel_batchbuffer *slice_batch)
1017 {
1018     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1019     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1020     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1021     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1022     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1023     unsigned int *msg = NULL, offset = 0;
1024     unsigned char *msg_ptr = NULL;
1025     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1026     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1027     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1028     int i,x,y;
1029     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1030     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1031     unsigned char *slice_header = NULL;
1032     int slice_header_length_in_bits = 0;
1033     unsigned int tail_data[] = { 0x0, 0x0 };
1034     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1035     int is_intra = slice_type == SLICE_TYPE_I;
1036
1037
1038     if (rate_control_mode == VA_RC_CBR) {
1039         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1040         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1041     }
1042
1043     /* only support for 8-bit pixel bit-depth */
1044     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1045     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1046     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1047     assert(qp >= 0 && qp < 52);
1048
1049     gen8_mfc_avc_slice_state(ctx, 
1050                              pPicParameter,
1051                              pSliceParameter,
1052                              encode_state, encoder_context,
1053                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1054
1055     if ( slice_index == 0) 
1056         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1057
1058     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1059
1060     // slice hander
1061     mfc_context->insert_object(ctx, encoder_context,
1062                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1063                                5,  /* first 5 bytes are start code + nal unit type */
1064                                1, 0, 1, slice_batch);
1065
1066     dri_bo_map(vme_context->vme_output.bo , 1);
1067     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1068
1069     if (is_intra) {
1070         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1071     } else {
1072         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1073     }
1074    
1075     for (i = pSliceParameter->macroblock_address; 
1076          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1077         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1078         x = i % width_in_mbs;
1079         y = i / width_in_mbs;
1080         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1081
1082         if (is_intra) {
1083             assert(msg);
1084             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1085         } else {
1086             int inter_rdo, intra_rdo;
1087             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1088             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1089             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1090             if (intra_rdo < inter_rdo) { 
1091                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1092             } else {
1093                 msg += AVC_INTER_MSG_OFFSET;
1094                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1095             }
1096         }
1097     }
1098    
1099     dri_bo_unmap(vme_context->vme_output.bo);
1100
1101     if ( last_slice ) {    
1102         mfc_context->insert_object(ctx, encoder_context,
1103                                    tail_data, 2, 8,
1104                                    2, 1, 1, 0, slice_batch);
1105     } else {
1106         mfc_context->insert_object(ctx, encoder_context,
1107                                    tail_data, 1, 8,
1108                                    1, 1, 1, 0, slice_batch);
1109     }
1110
1111     free(slice_header);
1112
1113 }
1114
1115 static dri_bo *
1116 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1117                                   struct encode_state *encode_state,
1118                                   struct intel_encoder_context *encoder_context)
1119 {
1120     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1121     struct i965_driver_data *i965 = i965_driver_data(ctx);
1122     struct intel_batchbuffer *batch;
1123     dri_bo *batch_bo;
1124     int i;
1125
1126     batch = mfc_context->aux_batchbuffer;
1127     batch_bo = batch->buffer;
1128     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1129         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1130     }
1131
1132     intel_batchbuffer_align(batch, 8);
1133     
1134     BEGIN_BCS_BATCH(batch, 2);
1135     OUT_BCS_BATCH(batch, 0);
1136     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1137     ADVANCE_BCS_BATCH(batch);
1138
1139     dri_bo_reference(batch_bo);
1140     intel_batchbuffer_free(batch);
1141     mfc_context->aux_batchbuffer = NULL;
1142
1143     return batch_bo;
1144 }
1145
1146 #else
1147
1148 static void
1149 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1150                                     struct encode_state *encode_state,
1151                                     struct intel_encoder_context *encoder_context)
1152
1153 {
1154     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1155     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1156
1157     assert(vme_context->vme_output.bo);
1158     mfc_context->buffer_suface_setup(ctx,
1159                                      &mfc_context->gpe_context,
1160                                      &vme_context->vme_output,
1161                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1162                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1163     assert(mfc_context->aux_batchbuffer_surface.bo);
1164     mfc_context->buffer_suface_setup(ctx,
1165                                      &mfc_context->gpe_context,
1166                                      &mfc_context->aux_batchbuffer_surface,
1167                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1168                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1169 }
1170
1171 static void
1172 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1173                                      struct encode_state *encode_state,
1174                                      struct intel_encoder_context *encoder_context)
1175
1176 {
1177     struct i965_driver_data *i965 = i965_driver_data(ctx);
1178     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1179     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1180     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1181     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1182     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1183     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1184     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1185     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1186                                                            "MFC batchbuffer",
1187                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1188                                                            0x1000);
1189     mfc_context->buffer_suface_setup(ctx,
1190                                      &mfc_context->gpe_context,
1191                                      &mfc_context->mfc_batchbuffer_surface,
1192                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1193                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1194 }
1195
1196 static void
1197 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1198                                     struct encode_state *encode_state,
1199                                     struct intel_encoder_context *encoder_context)
1200 {
1201     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1202     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1203 }
1204
1205 static void
1206 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1207                                 struct encode_state *encode_state,
1208                                 struct intel_encoder_context *encoder_context)
1209 {
1210     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1211     struct gen6_interface_descriptor_data *desc;   
1212     int i;
1213     dri_bo *bo;
1214
1215     bo = mfc_context->gpe_context.idrt.bo;
1216     dri_bo_map(bo, 1);
1217     assert(bo->virtual);
1218     desc = bo->virtual;
1219
1220     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1221         struct i965_kernel *kernel;
1222
1223         kernel = &mfc_context->gpe_context.kernels[i];
1224         assert(sizeof(*desc) == 32);
1225
1226         /*Setup the descritor table*/
1227         memset(desc, 0, sizeof(*desc));
1228         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1229         desc->desc2.sampler_count = 0;
1230         desc->desc2.sampler_state_pointer = 0;
1231         desc->desc3.binding_table_entry_count = 2;
1232         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1233         desc->desc4.constant_urb_entry_read_offset = 0;
1234         desc->desc4.constant_urb_entry_read_length = 4;
1235                 
1236         /*kernel start*/
1237         dri_bo_emit_reloc(bo,   
1238                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1239                           0,
1240                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1241                           kernel->bo);
1242         desc++;
1243     }
1244
1245     dri_bo_unmap(bo);
1246 }
1247
1248 static void
1249 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1250                                     struct encode_state *encode_state,
1251                                     struct intel_encoder_context *encoder_context)
1252 {
1253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1254     
1255     (void)mfc_context;
1256 }
1257
1258 static void
1259 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1260                                          int index,
1261                                          int head_offset,
1262                                          int batchbuffer_offset,
1263                                          int head_size,
1264                                          int tail_size,
1265                                          int number_mb_cmds,
1266                                          int first_object,
1267                                          int last_object,
1268                                          int last_slice,
1269                                          int mb_x,
1270                                          int mb_y,
1271                                          int width_in_mbs,
1272                                          int qp)
1273 {
1274     BEGIN_BATCH(batch, 12);
1275     
1276     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1277     OUT_BATCH(batch, index);
1278     OUT_BATCH(batch, 0);
1279     OUT_BATCH(batch, 0);
1280     OUT_BATCH(batch, 0);
1281     OUT_BATCH(batch, 0);
1282    
1283     /*inline data */
1284     OUT_BATCH(batch, head_offset);
1285     OUT_BATCH(batch, batchbuffer_offset);
1286     OUT_BATCH(batch, 
1287               head_size << 16 |
1288               tail_size);
1289     OUT_BATCH(batch,
1290               number_mb_cmds << 16 |
1291               first_object << 2 |
1292               last_object << 1 |
1293               last_slice);
1294     OUT_BATCH(batch,
1295               mb_y << 8 |
1296               mb_x);
1297     OUT_BATCH(batch,
1298               qp << 16 |
1299               width_in_mbs);
1300
1301     ADVANCE_BATCH(batch);
1302 }
1303
1304 static void
1305 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1306                                        struct intel_encoder_context *encoder_context,
1307                                        VAEncSliceParameterBufferH264 *slice_param,
1308                                        int head_offset,
1309                                        unsigned short head_size,
1310                                        unsigned short tail_size,
1311                                        int batchbuffer_offset,
1312                                        int qp,
1313                                        int last_slice)
1314 {
1315     struct intel_batchbuffer *batch = encoder_context->base.batch;
1316     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1317     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1318     int total_mbs = slice_param->num_macroblocks;
1319     int number_mb_cmds = 128;
1320     int starting_mb = 0;
1321     int last_object = 0;
1322     int first_object = 1;
1323     int i;
1324     int mb_x, mb_y;
1325     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1326
1327     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1328         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1329         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1330         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1331         assert(mb_x <= 255 && mb_y <= 255);
1332
1333         starting_mb += number_mb_cmds;
1334
1335         gen8_mfc_batchbuffer_emit_object_command(batch,
1336                                                  index,
1337                                                  head_offset,
1338                                                  batchbuffer_offset,
1339                                                  head_size,
1340                                                  tail_size,
1341                                                  number_mb_cmds,
1342                                                  first_object,
1343                                                  last_object,
1344                                                  last_slice,
1345                                                  mb_x,
1346                                                  mb_y,
1347                                                  width_in_mbs,
1348                                                  qp);
1349
1350         if (first_object) {
1351             head_offset += head_size;
1352             batchbuffer_offset += head_size;
1353         }
1354
1355         if (last_object) {
1356             head_offset += tail_size;
1357             batchbuffer_offset += tail_size;
1358         }
1359
1360         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1361
1362         first_object = 0;
1363     }
1364
1365     if (!last_object) {
1366         last_object = 1;
1367         number_mb_cmds = total_mbs % number_mb_cmds;
1368         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1369         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1370         assert(mb_x <= 255 && mb_y <= 255);
1371         starting_mb += number_mb_cmds;
1372
1373         gen8_mfc_batchbuffer_emit_object_command(batch,
1374                                                  index,
1375                                                  head_offset,
1376                                                  batchbuffer_offset,
1377                                                  head_size,
1378                                                  tail_size,
1379                                                  number_mb_cmds,
1380                                                  first_object,
1381                                                  last_object,
1382                                                  last_slice,
1383                                                  mb_x,
1384                                                  mb_y,
1385                                                  width_in_mbs,
1386                                                  qp);
1387     }
1388 }
1389                           
1390 /*
1391  * return size in Owords (16bytes)
1392  */         
1393 static int
1394 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1395                                struct encode_state *encode_state,
1396                                struct intel_encoder_context *encoder_context,
1397                                int slice_index,
1398                                int batchbuffer_offset)
1399 {
1400     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1401     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1402     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1403     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1404     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1405     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1406     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1407     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1408     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1409     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1410     unsigned char *slice_header = NULL;
1411     int slice_header_length_in_bits = 0;
1412     unsigned int tail_data[] = { 0x0, 0x0 };
1413     long head_offset;
1414     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1415     unsigned short head_size, tail_size;
1416     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1417
1418     if (rate_control_mode == VA_RC_CBR) {
1419         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1420         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1421     }
1422
1423     /* only support for 8-bit pixel bit-depth */
1424     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1425     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1426     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1427     assert(qp >= 0 && qp < 52);
1428
1429     head_offset = old_used / 16;
1430     gen8_mfc_avc_slice_state(ctx,
1431                              pPicParameter,
1432                              pSliceParameter,
1433                              encode_state,
1434                              encoder_context,
1435                              (rate_control_mode == VA_RC_CBR),
1436                              qp,
1437                              slice_batch);
1438
1439     if (slice_index == 0)
1440         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1441
1442     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1443
1444     // slice hander
1445     mfc_context->insert_object(ctx,
1446                                encoder_context,
1447                                (unsigned int *)slice_header,
1448                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1449                                slice_header_length_in_bits & 0x1f,
1450                                5,  /* first 5 bytes are start code + nal unit type */
1451                                1,
1452                                0,
1453                                1,
1454                                slice_batch);
1455     free(slice_header);
1456
1457     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1458     used = intel_batchbuffer_used_size(slice_batch);
1459     head_size = (used - old_used) / 16;
1460     old_used = used;
1461
1462     /* tail */
1463     if (last_slice) {    
1464         mfc_context->insert_object(ctx,
1465                                    encoder_context,
1466                                    tail_data,
1467                                    2,
1468                                    8,
1469                                    2,
1470                                    1,
1471                                    1,
1472                                    0,
1473                                    slice_batch);
1474     } else {
1475         mfc_context->insert_object(ctx,
1476                                    encoder_context,
1477                                    tail_data,
1478                                    1,
1479                                    8,
1480                                    1,
1481                                    1,
1482                                    1,
1483                                    0,
1484                                    slice_batch);
1485     }
1486
1487     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1488     used = intel_batchbuffer_used_size(slice_batch);
1489     tail_size = (used - old_used) / 16;
1490
1491    
1492     gen8_mfc_avc_batchbuffer_slice_command(ctx,
1493                                            encoder_context,
1494                                            pSliceParameter,
1495                                            head_offset,
1496                                            head_size,
1497                                            tail_size,
1498                                            batchbuffer_offset,
1499                                            qp,
1500                                            last_slice);
1501
1502     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1503 }
1504
1505 static void
1506 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1507                                   struct encode_state *encode_state,
1508                                   struct intel_encoder_context *encoder_context)
1509 {
1510     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1511     struct intel_batchbuffer *batch = encoder_context->base.batch;
1512     int i, size, offset = 0;
1513     intel_batchbuffer_start_atomic(batch, 0x4000); 
1514     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1515
1516     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1517         size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1518         offset += size;
1519     }
1520
1521     intel_batchbuffer_end_atomic(batch);
1522     intel_batchbuffer_flush(batch);
1523 }
1524
1525 static void
1526 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1527                                struct encode_state *encode_state,
1528                                struct intel_encoder_context *encoder_context)
1529 {
1530     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1531     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1532     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1533     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1534 }
1535
1536 static dri_bo *
1537 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1538                                   struct encode_state *encode_state,
1539                                   struct intel_encoder_context *encoder_context)
1540 {
1541     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1542
1543     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1544     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1545
1546     return mfc_context->mfc_batchbuffer_surface.bo;
1547 }
1548
1549 #endif
1550
1551 static void
1552 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1553                                  struct encode_state *encode_state,
1554                                  struct intel_encoder_context *encoder_context)
1555 {
1556     struct intel_batchbuffer *batch = encoder_context->base.batch;
1557     dri_bo *slice_batch_bo;
1558
1559     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1560         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1561         assert(0);
1562         return; 
1563     }
1564
1565 #ifdef MFC_SOFTWARE_HASWELL
1566     slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1567 #else
1568     slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1569 #endif
1570
1571     // begin programing
1572     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1573     intel_batchbuffer_emit_mi_flush(batch);
1574     
1575     // picture level programing
1576     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1577
1578     BEGIN_BCS_BATCH(batch, 3);
1579     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1580     OUT_BCS_RELOC(batch,
1581                   slice_batch_bo,
1582                   I915_GEM_DOMAIN_COMMAND, 0, 
1583                   0);
1584     OUT_BCS_BATCH(batch, 0);
1585     ADVANCE_BCS_BATCH(batch);
1586
1587     // end programing
1588     intel_batchbuffer_end_atomic(batch);
1589
1590     dri_bo_unreference(slice_batch_bo);
1591 }
1592
1593
1594 static VAStatus
1595 gen8_mfc_avc_encode_picture(VADriverContextP ctx, 
1596                             struct encode_state *encode_state,
1597                             struct intel_encoder_context *encoder_context)
1598 {
1599     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1600     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1601     int current_frame_bits_size;
1602     int sts;
1603  
1604     for (;;) {
1605         gen8_mfc_init(ctx, encode_state, encoder_context);
1606         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1607         /*Programing bcs pipeline*/
1608         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1609         gen8_mfc_run(ctx, encode_state, encoder_context);
1610         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1611             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1612             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1613             if (sts == BRC_NO_HRD_VIOLATION) {
1614                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1615                 break;
1616             }
1617             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1618                 if (!mfc_context->hrd.violation_noted) {
1619                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1620                     mfc_context->hrd.violation_noted = 1;
1621                 }
1622                 return VA_STATUS_SUCCESS;
1623             }
1624         } else {
1625             break;
1626         }
1627     }
1628
1629     return VA_STATUS_SUCCESS;
1630 }
1631
1632 /*
1633  * MPEG-2
1634  */
1635
1636 static const int
1637 va_to_gen8_mpeg2_picture_type[3] = {
1638     1,  /* I */
1639     2,  /* P */
1640     3   /* B */
1641 };
1642
1643 static void
1644 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1645                          struct intel_encoder_context *encoder_context,
1646                          struct encode_state *encode_state)
1647 {
1648     struct intel_batchbuffer *batch = encoder_context->base.batch;
1649     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1650     VAEncPictureParameterBufferMPEG2 *pic_param;
1651     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1652     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1653     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1654
1655     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1656     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1657     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1658
1659     BEGIN_BCS_BATCH(batch, 13);
1660     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1661     OUT_BCS_BATCH(batch,
1662                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1663                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1664                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1665                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1666                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1667                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1668                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1669                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1670                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1671                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1672                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1673                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1674     OUT_BCS_BATCH(batch,
1675                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1676                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1677                   0);
1678     OUT_BCS_BATCH(batch,
1679                   1 << 31 |     /* slice concealment */
1680                   (height_in_mbs - 1) << 16 |
1681                   (width_in_mbs - 1));
1682
1683     if (slice_param && slice_param->quantiser_scale_code >= 14)
1684         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1685     else
1686         OUT_BCS_BATCH(batch, 0);
1687
1688     OUT_BCS_BATCH(batch, 0);
1689     OUT_BCS_BATCH(batch,
1690                   0xFFF << 16 | /* InterMBMaxSize */
1691                   0xFFF << 0 |  /* IntraMBMaxSize */
1692                   0);
1693     OUT_BCS_BATCH(batch, 0);
1694     OUT_BCS_BATCH(batch, 0);
1695     OUT_BCS_BATCH(batch, 0);
1696     OUT_BCS_BATCH(batch, 0);
1697     OUT_BCS_BATCH(batch, 0);
1698     OUT_BCS_BATCH(batch, 0);
1699     ADVANCE_BCS_BATCH(batch);
1700 }
1701
1702 static void
1703 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1704 {
1705     unsigned char intra_qm[64] = {
1706         8, 16, 19, 22, 26, 27, 29, 34,
1707         16, 16, 22, 24, 27, 29, 34, 37,
1708         19, 22, 26, 27, 29, 34, 34, 38,
1709         22, 22, 26, 27, 29, 34, 37, 40,
1710         22, 26, 27, 29, 32, 35, 40, 48,
1711         26, 27, 29, 32, 35, 40, 48, 58,
1712         26, 27, 29, 34, 38, 46, 56, 69,
1713         27, 29, 35, 38, 46, 56, 69, 83
1714     };
1715
1716     unsigned char non_intra_qm[64] = {
1717         16, 16, 16, 16, 16, 16, 16, 16,
1718         16, 16, 16, 16, 16, 16, 16, 16,
1719         16, 16, 16, 16, 16, 16, 16, 16,
1720         16, 16, 16, 16, 16, 16, 16, 16,
1721         16, 16, 16, 16, 16, 16, 16, 16,
1722         16, 16, 16, 16, 16, 16, 16, 16,
1723         16, 16, 16, 16, 16, 16, 16, 16,
1724         16, 16, 16, 16, 16, 16, 16, 16
1725     };
1726
1727     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1728     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1729 }
1730
1731 static void
1732 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1733 {
1734     unsigned short intra_fqm[64] = {
1735         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1736         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1737         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1738         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1739         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1740         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1741         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1742         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1743     };
1744
1745     unsigned short non_intra_fqm[64] = {
1746         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1747         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1748         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1749         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1750         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1751         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1752         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1753         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1754     };
1755
1756     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1757     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1758 }
1759
1760 static void
1761 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1762                                 struct intel_encoder_context *encoder_context,
1763                                 int x, int y,
1764                                 int next_x, int next_y,
1765                                 int is_fisrt_slice_group,
1766                                 int is_last_slice_group,
1767                                 int intra_slice,
1768                                 int qp,
1769                                 struct intel_batchbuffer *batch)
1770 {
1771     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1772
1773     if (batch == NULL)
1774         batch = encoder_context->base.batch;
1775
1776     BEGIN_BCS_BATCH(batch, 8);
1777
1778     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1779     OUT_BCS_BATCH(batch,
1780                   0 << 31 |                             /* MbRateCtrlFlag */
1781                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1782                   1 << 17 |                             /* Insert Header before the first slice group data */
1783                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1784                   1 << 15 |                             /* TailPresentFlag: always 1 */
1785                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1786                   !!intra_slice << 13 |                 /* IntraSlice */
1787                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1788                   0);
1789     OUT_BCS_BATCH(batch,
1790                   next_y << 24 |
1791                   next_x << 16 |
1792                   y << 8 |
1793                   x << 0 |
1794                   0);
1795     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1796     /* bitstream pointer is only loaded once for the first slice of a frame when 
1797      * LoadSlicePointerFlag is 0
1798      */
1799     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1800     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1801     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1802     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1803
1804     ADVANCE_BCS_BATCH(batch);
1805 }
1806
1807 static int
1808 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1809                                 struct intel_encoder_context *encoder_context,
1810                                 int x, int y,
1811                                 int first_mb_in_slice,
1812                                 int last_mb_in_slice,
1813                                 int first_mb_in_slice_group,
1814                                 int last_mb_in_slice_group,
1815                                 int mb_type,
1816                                 int qp_scale_code,
1817                                 int coded_block_pattern,
1818                                 unsigned char target_size_in_word,
1819                                 unsigned char max_size_in_word,
1820                                 struct intel_batchbuffer *batch)
1821 {
1822     int len_in_dwords = 9;
1823
1824     if (batch == NULL)
1825         batch = encoder_context->base.batch;
1826
1827     BEGIN_BCS_BATCH(batch, len_in_dwords);
1828
1829     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1830     OUT_BCS_BATCH(batch,
1831                   0 << 24 |     /* PackedMvNum */
1832                   0 << 20 |     /* MvFormat */
1833                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1834                   0 << 15 |     /* TransformFlag: frame DCT */
1835                   0 << 14 |     /* FieldMbFlag */
1836                   1 << 13 |     /* IntraMbFlag */
1837                   mb_type << 8 |   /* MbType: Intra */
1838                   0 << 2 |      /* SkipMbFlag */
1839                   0 << 0 |      /* InterMbMode */
1840                   0);
1841     OUT_BCS_BATCH(batch, y << 16 | x);
1842     OUT_BCS_BATCH(batch,
1843                   max_size_in_word << 24 |
1844                   target_size_in_word << 16 |
1845                   coded_block_pattern << 6 |      /* CBP */
1846                   0);
1847     OUT_BCS_BATCH(batch,
1848                   last_mb_in_slice << 31 |
1849                   first_mb_in_slice << 30 |
1850                   0 << 27 |     /* EnableCoeffClamp */
1851                   last_mb_in_slice_group << 26 |
1852                   0 << 25 |     /* MbSkipConvDisable */
1853                   first_mb_in_slice_group << 24 |
1854                   0 << 16 |     /* MvFieldSelect */
1855                   qp_scale_code << 0 |
1856                   0);
1857     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1858     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1859     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1860     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1861
1862     ADVANCE_BCS_BATCH(batch);
1863
1864     return len_in_dwords;
1865 }
1866
1867 /* Byte offset */
1868 #define MPEG2_INTER_MV_OFFSET   48 
1869
1870 static struct _mv_ranges
1871 {
1872     int low;    /* in the unit of 1/2 pixel */
1873     int high;   /* in the unit of 1/2 pixel */
1874 } mv_ranges[] = {
1875     {0, 0},
1876     {-16, 15},
1877     {-32, 31},
1878     {-64, 63},
1879     {-128, 127},
1880     {-256, 255},
1881     {-512, 511},
1882     {-1024, 1023},
1883     {-2048, 2047},
1884     {-4096, 4095}
1885 };
1886
1887 static int
1888 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1889 {
1890     if (mv + pos * 16 * 2 < 0 ||
1891         mv + (pos + 1) * 16 * 2 > display_max * 2)
1892         mv = 0;
1893
1894     if (f_code > 0 && f_code < 10) {
1895         if (mv < mv_ranges[f_code].low)
1896             mv = mv_ranges[f_code].low;
1897
1898         if (mv > mv_ranges[f_code].high)
1899             mv = mv_ranges[f_code].high;
1900     }
1901
1902     return mv;
1903 }
1904
1905 static int
1906 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1907                                 struct encode_state *encode_state,
1908                                 struct intel_encoder_context *encoder_context,
1909                                 unsigned int *msg,
1910                                 int width_in_mbs, int height_in_mbs,
1911                                 int x, int y,
1912                                 int first_mb_in_slice,
1913                                 int last_mb_in_slice,
1914                                 int first_mb_in_slice_group,
1915                                 int last_mb_in_slice_group,
1916                                 int qp_scale_code,
1917                                 unsigned char target_size_in_word,
1918                                 unsigned char max_size_in_word,
1919                                 struct intel_batchbuffer *batch)
1920 {
1921     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1922     int len_in_dwords = 9;
1923     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1924     
1925     if (batch == NULL)
1926         batch = encoder_context->base.batch;
1927
1928     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1929     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1930     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1931     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1932     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1933
1934     BEGIN_BCS_BATCH(batch, len_in_dwords);
1935
1936     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1937     OUT_BCS_BATCH(batch,
1938                   2 << 24 |     /* PackedMvNum */
1939                   7 << 20 |     /* MvFormat */
1940                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1941                   0 << 15 |     /* TransformFlag: frame DCT */
1942                   0 << 14 |     /* FieldMbFlag */
1943                   0 << 13 |     /* IntraMbFlag */
1944                   1 << 8 |      /* MbType: Frame-based */
1945                   0 << 2 |      /* SkipMbFlag */
1946                   0 << 0 |      /* InterMbMode */
1947                   0);
1948     OUT_BCS_BATCH(batch, y << 16 | x);
1949     OUT_BCS_BATCH(batch,
1950                   max_size_in_word << 24 |
1951                   target_size_in_word << 16 |
1952                   0x3f << 6 |   /* CBP */
1953                   0);
1954     OUT_BCS_BATCH(batch,
1955                   last_mb_in_slice << 31 |
1956                   first_mb_in_slice << 30 |
1957                   0 << 27 |     /* EnableCoeffClamp */
1958                   last_mb_in_slice_group << 26 |
1959                   0 << 25 |     /* MbSkipConvDisable */
1960                   first_mb_in_slice_group << 24 |
1961                   0 << 16 |     /* MvFieldSelect */
1962                   qp_scale_code << 0 |
1963                   0);
1964
1965     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1966     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1967     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1968     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1969
1970     ADVANCE_BCS_BATCH(batch);
1971
1972     return len_in_dwords;
1973 }
1974
1975 static void
1976 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1977                                            struct encode_state *encode_state,
1978                                            struct intel_encoder_context *encoder_context,
1979                                            struct intel_batchbuffer *slice_batch)
1980 {
1981     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1982     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1983
1984     if (encode_state->packed_header_data[idx]) {
1985         VAEncPackedHeaderParameterBuffer *param = NULL;
1986         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1987         unsigned int length_in_bits;
1988
1989         assert(encode_state->packed_header_param[idx]);
1990         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1991         length_in_bits = param->bit_length;
1992
1993         mfc_context->insert_object(ctx,
1994                                    encoder_context,
1995                                    header_data,
1996                                    ALIGN(length_in_bits, 32) >> 5,
1997                                    length_in_bits & 0x1f,
1998                                    5,   /* FIXME: check it */
1999                                    0,
2000                                    0,
2001                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2002                                    slice_batch);
2003     }
2004
2005     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2006
2007     if (encode_state->packed_header_data[idx]) {
2008         VAEncPackedHeaderParameterBuffer *param = NULL;
2009         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2010         unsigned int length_in_bits;
2011
2012         assert(encode_state->packed_header_param[idx]);
2013         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2014         length_in_bits = param->bit_length;
2015
2016         mfc_context->insert_object(ctx,
2017                                    encoder_context,
2018                                    header_data,
2019                                    ALIGN(length_in_bits, 32) >> 5,
2020                                    length_in_bits & 0x1f,
2021                                    5,   /* FIXME: check it */
2022                                    0,
2023                                    0,
2024                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2025                                    slice_batch);
2026     }
2027 }
2028
2029 static void 
2030 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2031                                     struct encode_state *encode_state,
2032                                     struct intel_encoder_context *encoder_context,
2033                                     int slice_index,
2034                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2035                                     struct intel_batchbuffer *slice_batch)
2036 {
2037     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2038     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2039     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2040     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2041     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2042     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2043     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2044     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2045     int i, j;
2046     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2047     unsigned int *msg = NULL;
2048     unsigned char *msg_ptr = NULL;
2049
2050     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2051     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2052     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2053     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2054
2055     dri_bo_map(vme_context->vme_output.bo , 0);
2056     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2057
2058     if (next_slice_group_param) {
2059         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2060         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2061     } else {
2062         h_next_start_pos = 0;
2063         v_next_start_pos = height_in_mbs;
2064     }
2065
2066     gen8_mfc_mpeg2_slicegroup_state(ctx,
2067                                     encoder_context,
2068                                     h_start_pos,
2069                                     v_start_pos,
2070                                     h_next_start_pos,
2071                                     v_next_start_pos,
2072                                     slice_index == 0,
2073                                     next_slice_group_param == NULL,
2074                                     slice_param->is_intra_slice,
2075                                     slice_param->quantiser_scale_code,
2076                                     slice_batch);
2077
2078     if (slice_index == 0) 
2079         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2080
2081     /* Insert '00' to make sure the header is valid */
2082     mfc_context->insert_object(ctx,
2083                                encoder_context,
2084                                (unsigned int*)section_delimiter,
2085                                1,
2086                                8,   /* 8bits in the last DWORD */
2087                                1,   /* 1 byte */
2088                                1,
2089                                0,
2090                                0,
2091                                slice_batch);
2092
2093     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2094         /* PAK for each macroblocks */
2095         for (j = 0; j < slice_param->num_macroblocks; j++) {
2096             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2097             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2098             int first_mb_in_slice = (j == 0);
2099             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2100             int first_mb_in_slice_group = (i == 0 && j == 0);
2101             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2102                                           j == slice_param->num_macroblocks - 1);
2103
2104             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2105
2106             if (slice_param->is_intra_slice) {
2107                 gen8_mfc_mpeg2_pak_object_intra(ctx,
2108                                                 encoder_context,
2109                                                 h_pos, v_pos,
2110                                                 first_mb_in_slice,
2111                                                 last_mb_in_slice,
2112                                                 first_mb_in_slice_group,
2113                                                 last_mb_in_slice_group,
2114                                                 0x1a,
2115                                                 slice_param->quantiser_scale_code,
2116                                                 0x3f,
2117                                                 0,
2118                                                 0xff,
2119                                                 slice_batch);
2120             } else {
2121                 int inter_rdo, intra_rdo;
2122                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2123                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2124
2125                 if (intra_rdo < inter_rdo) 
2126                     gen8_mfc_mpeg2_pak_object_intra(ctx,
2127                                                      encoder_context,
2128                                                      h_pos, v_pos,
2129                                                      first_mb_in_slice,
2130                                                      last_mb_in_slice,
2131                                                      first_mb_in_slice_group,
2132                                                      last_mb_in_slice_group,
2133                                                      0x1a,
2134                                                      slice_param->quantiser_scale_code,
2135                                                      0x3f,
2136                                                      0,
2137                                                      0xff,
2138                                                      slice_batch);
2139                 else
2140                     gen8_mfc_mpeg2_pak_object_inter(ctx,
2141                                                 encode_state,
2142                                                 encoder_context,
2143                                                 msg,
2144                                                 width_in_mbs, height_in_mbs,
2145                                                 h_pos, v_pos,
2146                                                 first_mb_in_slice,
2147                                                 last_mb_in_slice,
2148                                                 first_mb_in_slice_group,
2149                                                 last_mb_in_slice_group,
2150                                                 slice_param->quantiser_scale_code,
2151                                                 0,
2152                                                 0xff,
2153                                                 slice_batch);
2154             }
2155         }
2156
2157         slice_param++;
2158     }
2159
2160     dri_bo_unmap(vme_context->vme_output.bo);
2161
2162     /* tail data */
2163     if (next_slice_group_param == NULL) { /* end of a picture */
2164         mfc_context->insert_object(ctx,
2165                                    encoder_context,
2166                                    (unsigned int *)tail_delimiter,
2167                                    2,
2168                                    8,   /* 8bits in the last DWORD */
2169                                    5,   /* 5 bytes */
2170                                    1,
2171                                    1,
2172                                    0,
2173                                    slice_batch);
2174     } else {        /* end of a lsice group */
2175         mfc_context->insert_object(ctx,
2176                                    encoder_context,
2177                                    (unsigned int *)section_delimiter,
2178                                    1,
2179                                    8,   /* 8bits in the last DWORD */
2180                                    1,   /* 1 byte */
2181                                    1,
2182                                    1,
2183                                    0,
2184                                    slice_batch);
2185     }
2186 }
2187
2188 /* 
2189  * A batch buffer for all slices, including slice state, 
2190  * slice insert object and slice pak object commands
2191  *
2192  */
2193 static dri_bo *
2194 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2195                                           struct encode_state *encode_state,
2196                                           struct intel_encoder_context *encoder_context)
2197 {
2198     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2199     struct i965_driver_data *i965 = i965_driver_data(ctx);
2200     struct intel_batchbuffer *batch;
2201     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2202     dri_bo *batch_bo;
2203     int i;
2204
2205     batch = mfc_context->aux_batchbuffer;
2206     batch_bo = batch->buffer;
2207
2208     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2209         if (i == encode_state->num_slice_params_ext - 1)
2210             next_slice_group_param = NULL;
2211         else
2212             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2213
2214         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2215     }
2216
2217     intel_batchbuffer_align(batch, 8);
2218     
2219     BEGIN_BCS_BATCH(batch, 2);
2220     OUT_BCS_BATCH(batch, 0);
2221     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2222     ADVANCE_BCS_BATCH(batch);
2223
2224     dri_bo_reference(batch_bo);
2225     intel_batchbuffer_free(batch);
2226     mfc_context->aux_batchbuffer = NULL;
2227
2228     return batch_bo;
2229 }
2230
2231 static void
2232 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2233                                            struct encode_state *encode_state,
2234                                            struct intel_encoder_context *encoder_context)
2235 {
2236     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2237
2238     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2239     mfc_context->set_surface_state(ctx, encoder_context);
2240     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2241     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2242     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2243     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2244     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2245     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2246 }
2247
2248 static void
2249 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2250                                    struct encode_state *encode_state,
2251                                    struct intel_encoder_context *encoder_context)
2252 {
2253     struct intel_batchbuffer *batch = encoder_context->base.batch;
2254     dri_bo *slice_batch_bo;
2255
2256     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2257
2258     // begin programing
2259     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2260     intel_batchbuffer_emit_mi_flush(batch);
2261     
2262     // picture level programing
2263     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2264
2265     BEGIN_BCS_BATCH(batch, 4);
2266     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2267     OUT_BCS_RELOC(batch,
2268                   slice_batch_bo,
2269                   I915_GEM_DOMAIN_COMMAND, 0, 
2270                   0);
2271     OUT_BCS_BATCH(batch, 0);
2272     OUT_BCS_BATCH(batch, 0);
2273     ADVANCE_BCS_BATCH(batch);
2274
2275     // end programing
2276     intel_batchbuffer_end_atomic(batch);
2277
2278     dri_bo_unreference(slice_batch_bo);
2279 }
2280
2281 static VAStatus
2282 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2283                         struct encode_state *encode_state,
2284                         struct intel_encoder_context *encoder_context)
2285 {
2286     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2287     struct object_surface *obj_surface; 
2288     struct object_buffer *obj_buffer;
2289     struct i965_coded_buffer_segment *coded_buffer_segment;
2290     VAStatus vaStatus = VA_STATUS_SUCCESS;
2291     dri_bo *bo;
2292     int i;
2293
2294     /* reconstructed surface */
2295     obj_surface = encode_state->reconstructed_object;
2296     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2297     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2298     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2299     mfc_context->surface_state.width = obj_surface->orig_width;
2300     mfc_context->surface_state.height = obj_surface->orig_height;
2301     mfc_context->surface_state.w_pitch = obj_surface->width;
2302     mfc_context->surface_state.h_pitch = obj_surface->height;
2303
2304     /* forward reference */
2305     obj_surface = encode_state->reference_objects[0];
2306
2307     if (obj_surface && obj_surface->bo) {
2308         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2309         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2310     } else
2311         mfc_context->reference_surfaces[0].bo = NULL;
2312
2313     /* backward reference */
2314     obj_surface = encode_state->reference_objects[1];
2315
2316     if (obj_surface && obj_surface->bo) {
2317         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2318         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2319     } else {
2320         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2321
2322         if (mfc_context->reference_surfaces[1].bo)
2323             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2324     }
2325
2326     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2327         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2328
2329         if (mfc_context->reference_surfaces[i].bo)
2330             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2331     }
2332     
2333     /* input YUV surface */
2334     obj_surface = encode_state->input_yuv_object;
2335     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2336     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2337
2338     /* coded buffer */
2339     obj_buffer = encode_state->coded_buf_object;
2340     bo = obj_buffer->buffer_store->bo;
2341     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2342     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2343     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2344     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2345
2346     /* set the internal flag to 0 to indicate the coded size is unknown */
2347     dri_bo_map(bo, 1);
2348     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2349     coded_buffer_segment->mapped = 0;
2350     coded_buffer_segment->codec = encoder_context->codec;
2351     dri_bo_unmap(bo);
2352
2353     return vaStatus;
2354 }
2355
2356 static VAStatus
2357 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2358                               struct encode_state *encode_state,
2359                               struct intel_encoder_context *encoder_context)
2360 {
2361     gen8_mfc_init(ctx, encode_state, encoder_context);
2362     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2363     /*Programing bcs pipeline*/
2364     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2365     gen8_mfc_run(ctx, encode_state, encoder_context);
2366
2367     return VA_STATUS_SUCCESS;
2368 }
2369
2370 static void
2371 gen8_mfc_context_destroy(void *context)
2372 {
2373     struct gen6_mfc_context *mfc_context = context;
2374     int i;
2375
2376     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2377     mfc_context->post_deblocking_output.bo = NULL;
2378
2379     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2380     mfc_context->pre_deblocking_output.bo = NULL;
2381
2382     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2383     mfc_context->uncompressed_picture_source.bo = NULL;
2384
2385     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2386     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2387
2388     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2389         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2390         mfc_context->direct_mv_buffers[i].bo = NULL;
2391     }
2392
2393     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2394     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2395
2396     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2397     mfc_context->macroblock_status_buffer.bo = NULL;
2398
2399     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2400     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2401
2402     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2403     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2404
2405
2406     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2407         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2408         mfc_context->reference_surfaces[i].bo = NULL;  
2409     }
2410
2411     i965_gpe_context_destroy(&mfc_context->gpe_context);
2412
2413     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2414     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2415
2416     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2417     mfc_context->aux_batchbuffer_surface.bo = NULL;
2418
2419     if (mfc_context->aux_batchbuffer)
2420         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2421
2422     mfc_context->aux_batchbuffer = NULL;
2423
2424     free(mfc_context);
2425 }
2426
2427 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
2428                                   VAProfile profile,
2429                                   struct encode_state *encode_state,
2430                                   struct intel_encoder_context *encoder_context)
2431 {
2432     VAStatus vaStatus;
2433
2434     switch (profile) {
2435     case VAProfileH264ConstrainedBaseline:
2436     case VAProfileH264Main:
2437     case VAProfileH264High:
2438         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2439         break;
2440
2441         /* FIXME: add for other profile */
2442     case VAProfileMPEG2Simple:
2443     case VAProfileMPEG2Main:
2444         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2445         break;
2446
2447     default:
2448         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2449         break;
2450     }
2451
2452     return vaStatus;
2453 }
2454
2455 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2456 {
2457     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2458
2459     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2460
2461     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2462     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2463
2464     mfc_context->gpe_context.curbe.length = 32 * 4;
2465
2466     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2467     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2468     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2469     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2470     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2471
2472     i965_gpe_load_kernels(ctx,
2473                           &mfc_context->gpe_context,
2474                           gen8_mfc_kernels,
2475                           NUM_MFC_KERNEL);
2476
2477     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
2478     mfc_context->set_surface_state = gen8_mfc_surface_state;
2479     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
2480     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
2481     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
2482     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
2483     mfc_context->insert_object = gen8_mfc_avc_insert_object;
2484     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2485
2486     encoder_context->mfc_context = mfc_context;
2487     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
2488     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
2489     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2490
2491     return True;
2492 }