Add the common BRC API to avoid the duplicated code
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44
45 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
46 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
47 };
48
49 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
50 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
51 };
52
53 static struct i965_kernel gen75_mfc_kernels[] = {
54     {
55         "MFC AVC INTRA BATCHBUFFER ",
56         MFC_BATCHBUFFER_AVC_INTRA,
57         gen75_mfc_batchbuffer_avc_intra,
58         sizeof(gen75_mfc_batchbuffer_avc_intra),
59         NULL
60     },
61
62     {
63         "MFC AVC INTER BATCHBUFFER ",
64         MFC_BATCHBUFFER_AVC_INTER,
65         gen75_mfc_batchbuffer_avc_inter,
66         sizeof(gen75_mfc_batchbuffer_avc_inter),
67         NULL
68     },
69 };
70
71 static void
72 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
73                           int standard_select,
74                           struct intel_encoder_context *encoder_context)
75 {
76     struct intel_batchbuffer *batch = encoder_context->base.batch;
77     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
78
79     assert(standard_select == MFX_FORMAT_MPEG2 ||
80            standard_select == MFX_FORMAT_AVC);
81
82     BEGIN_BCS_BATCH(batch, 5);
83
84     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
85     OUT_BCS_BATCH(batch,
86                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
87                   (MFD_MODE_VLD << 15) | /* VLD mode */
88                   (1 << 10) | /* Stream-Out Enable */
89                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
90                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
112 {
113     struct intel_batchbuffer *batch = encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 18) |
122                   ((mfc_context->surface_state.width - 1) << 4));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be tiled */
130                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135
136     ADVANCE_BCS_BATCH(batch);
137 }
138
139 static void
140 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
141 {
142     struct intel_batchbuffer *batch = encoder_context->base.batch;
143     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
144     struct gen6_vme_context *vme_context = encoder_context->vme_context;
145
146     BEGIN_BCS_BATCH(batch, 11);
147
148     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
149     OUT_BCS_BATCH(batch, 0);
150     OUT_BCS_BATCH(batch, 0);
151     /* MFX Indirect MV Object Base Address */
152     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
153     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
154     OUT_BCS_BATCH(batch, 0);
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch, 0);
158     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
159     OUT_BCS_RELOC(batch,
160                   mfc_context->mfc_indirect_pak_bse_object.bo,
161                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
162                   0);
163     OUT_BCS_RELOC(batch,
164                   mfc_context->mfc_indirect_pak_bse_object.bo,
165                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
166                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
167
168     ADVANCE_BCS_BATCH(batch);
169 }
170
171 static void
172 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
173                        struct intel_encoder_context *encoder_context)
174 {
175     struct intel_batchbuffer *batch = encoder_context->base.batch;
176     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
177     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
178
179     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
180     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
181
182     BEGIN_BCS_BATCH(batch, 16);
183
184     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
185     OUT_BCS_BATCH(batch,
186                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
187     OUT_BCS_BATCH(batch, 
188                   ((height_in_mbs - 1) << 16) | 
189                   ((width_in_mbs - 1) << 0));
190     OUT_BCS_BATCH(batch, 
191                   (0 << 24) |   /* Second Chroma QP Offset */
192                   (0 << 16) |   /* Chroma QP Offset */
193                   (0 << 14) |   /* Max-bit conformance Intra flag */
194                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
195                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
196                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
197                   (0 << 8)  |   /* FIXME: Image Structure */
198                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
199     OUT_BCS_BATCH(batch,
200                   (0 << 16) |   /* Mininum Frame size */
201                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
202                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
203                   (0 << 13) |   /* CABAC 0 word insertion test enable */
204                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
205                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
206                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
207                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
208                   (0 << 6)  |   /* Only valid for VLD decoding mode */
209                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
210                   (0 << 4)  |   /* Direct 8x8 inference flag */
211                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
212                   (1 << 2)  |   /* Frame MB only flag */
213                   (0 << 1)  |   /* MBAFF mode is in active */
214                   (0 << 0));    /* Field picture flag */
215     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
216     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
217                   (0xBB8 << 16) |       /* InterMbMaxSz */
218                   (0xEE8) );            /* IntraMbMaxSz */
219     OUT_BCS_BATCH(batch, 0);            /* Reserved */
220     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
221     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
222     OUT_BCS_BATCH(batch, 0x8C000000);
223     OUT_BCS_BATCH(batch, 0x00010000);
224     OUT_BCS_BATCH(batch, 0);
225     OUT_BCS_BATCH(batch, 0);
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228
229     ADVANCE_BCS_BATCH(batch);
230 }
231
232 static void
233 gen75_mfc_qm_state(VADriverContextP ctx,
234                   int qm_type,
235                   unsigned int *qm,
236                   int qm_length,
237                   struct intel_encoder_context *encoder_context)
238 {
239     struct intel_batchbuffer *batch = encoder_context->base.batch;
240     unsigned int qm_buffer[16];
241
242     assert(qm_length <= 16);
243     assert(sizeof(*qm) == 4);
244     memcpy(qm_buffer, qm, qm_length * 4);
245
246     BEGIN_BCS_BATCH(batch, 18);
247     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
248     OUT_BCS_BATCH(batch, qm_type << 0);
249     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
250     ADVANCE_BCS_BATCH(batch);
251 }
252
253 static void
254 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
255 {
256     unsigned int qm[16] = {
257         0x10101010, 0x10101010, 0x10101010, 0x10101010,
258         0x10101010, 0x10101010, 0x10101010, 0x10101010,
259         0x10101010, 0x10101010, 0x10101010, 0x10101010,
260         0x10101010, 0x10101010, 0x10101010, 0x10101010
261     };
262
263     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
264     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
265     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
266     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
267 }
268
269 static void
270 gen75_mfc_fqm_state(VADriverContextP ctx,
271                    int fqm_type,
272                    unsigned int *fqm,
273                    int fqm_length,
274                    struct intel_encoder_context *encoder_context)
275 {
276     struct intel_batchbuffer *batch = encoder_context->base.batch;
277     unsigned int fqm_buffer[32];
278
279     assert(fqm_length <= 32);
280     assert(sizeof(*fqm) == 4);
281     memcpy(fqm_buffer, fqm, fqm_length * 4);
282
283     BEGIN_BCS_BATCH(batch, 34);
284     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
285     OUT_BCS_BATCH(batch, fqm_type << 0);
286     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
287     ADVANCE_BCS_BATCH(batch);
288 }
289
290 static void
291 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
292 {
293     unsigned int qm[32] = {
294         0x10001000, 0x10001000, 0x10001000, 0x10001000,
295         0x10001000, 0x10001000, 0x10001000, 0x10001000,
296         0x10001000, 0x10001000, 0x10001000, 0x10001000,
297         0x10001000, 0x10001000, 0x10001000, 0x10001000,
298         0x10001000, 0x10001000, 0x10001000, 0x10001000,
299         0x10001000, 0x10001000, 0x10001000, 0x10001000,
300         0x10001000, 0x10001000, 0x10001000, 0x10001000,
301         0x10001000, 0x10001000, 0x10001000, 0x10001000
302     };
303
304     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
305     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
306     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
307     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
308 }
309
310 static void
311 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
312                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
313                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
314                            struct intel_batchbuffer *batch)
315 {
316     if (batch == NULL)
317         batch = encoder_context->base.batch;
318
319     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
320
321     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
322     OUT_BCS_BATCH(batch,
323                   (0 << 16) |   /* always start at offset 0 */
324                   (data_bits_in_last_dw << 8) |
325                   (skip_emul_byte_count << 4) |
326                   (!!emulation_flag << 3) |
327                   ((!!is_last_header) << 2) |
328                   ((!!is_end_of_slice) << 1) |
329                   (0 << 0));    /* FIXME: ??? */
330     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
331
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335
336
337 static void 
338 gen75_mfc_free_avc_surface(void **data)
339 {
340     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
341
342     if (!avc_surface)
343         return;
344
345     dri_bo_unreference(avc_surface->dmv_top);
346     avc_surface->dmv_top = NULL;
347     dri_bo_unreference(avc_surface->dmv_bottom);
348     avc_surface->dmv_bottom = NULL;
349
350     free(avc_surface);
351     *data = NULL;
352 }
353
354 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
355 {
356     struct i965_driver_data *i965 = i965_driver_data(ctx);
357     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
358     dri_bo *bo;
359     int i;
360
361     /*Encode common setup for MFC*/
362     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
363     mfc_context->post_deblocking_output.bo = NULL;
364
365     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
366     mfc_context->pre_deblocking_output.bo = NULL;
367
368     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
369     mfc_context->uncompressed_picture_source.bo = NULL;
370
371     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
372     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
373
374     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
375         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
376         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
377         mfc_context->direct_mv_buffers[i].bo = NULL;
378     }
379
380     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
381         if (mfc_context->reference_surfaces[i].bo != NULL)
382             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
383         mfc_context->reference_surfaces[i].bo = NULL;  
384     }
385
386     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
387     bo = dri_bo_alloc(i965->intel.bufmgr,
388                       "Buffer",
389                       128 * 64,
390                       64);
391     assert(bo);
392     mfc_context->intra_row_store_scratch_buffer.bo = bo;
393
394     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
395     bo = dri_bo_alloc(i965->intel.bufmgr,
396                       "Buffer",
397                       128*128*16,
398                       64);
399     assert(bo);
400     mfc_context->macroblock_status_buffer.bo = bo;
401
402     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
403     bo = dri_bo_alloc(i965->intel.bufmgr,
404                       "Buffer",
405                       49152,  /* 6 * 128 * 64 */
406                       64);
407     assert(bo);
408     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
409
410     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
411     bo = dri_bo_alloc(i965->intel.bufmgr,
412                       "Buffer",
413                       12288, /* 1.5 * 128 * 64 */
414                       0x1000);
415     assert(bo);
416     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
417
418     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
419     mfc_context->mfc_batchbuffer_surface.bo = NULL;
420
421     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
422     mfc_context->aux_batchbuffer_surface.bo = NULL;
423
424     if (mfc_context->aux_batchbuffer)
425         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
426
427     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
428     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
429     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
430     mfc_context->aux_batchbuffer_surface.pitch = 16;
431     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
432     mfc_context->aux_batchbuffer_surface.size_block = 16;
433
434     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
435 }
436
437 static void
438 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
439 {
440     struct intel_batchbuffer *batch = encoder_context->base.batch;
441     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
442     int i;
443
444     BEGIN_BCS_BATCH(batch, 24);
445
446     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
447
448     if (mfc_context->pre_deblocking_output.bo)
449         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
450                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
451                       0);
452     else
453         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
454
455     if (mfc_context->post_deblocking_output.bo)
456         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
457                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
458                       0);                                                                                       /* post output addr  */ 
459     else
460         OUT_BCS_BATCH(batch, 0);
461
462     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
463                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
464                   0);                                                                                   /* uncompressed data */
465     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
466                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
467                   0);                                                                                   /* StreamOut data*/
468     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
469                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
470                   0);   
471     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
472                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
473                   0);
474     /* 7..22 Reference pictures*/
475     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
476         if ( mfc_context->reference_surfaces[i].bo != NULL) {
477             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
478                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
479                           0);                   
480         } else {
481             OUT_BCS_BATCH(batch, 0);
482         }
483     }
484     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
485                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
486                   0);                                                                                   /* Macroblock status buffer*/
487
488     ADVANCE_BCS_BATCH(batch);
489 }
490
491 static void
492 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
493 {
494     struct intel_batchbuffer *batch = encoder_context->base.batch;
495     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
496
497     int i;
498
499     BEGIN_BCS_BATCH(batch, 69);
500
501     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
502
503     /* Reference frames and Current frames */
504     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
505         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
506             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
507                           I915_GEM_DOMAIN_INSTRUCTION, 0,
508                           0);
509         } else {
510             OUT_BCS_BATCH(batch, 0);
511         }
512     }
513
514     /* POL list */
515     for(i = 0; i < 32; i++) {
516         OUT_BCS_BATCH(batch, i/2);
517     }
518     OUT_BCS_BATCH(batch, 0);
519     OUT_BCS_BATCH(batch, 0);
520
521     ADVANCE_BCS_BATCH(batch);
522 }
523
524 static void
525 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
526 {
527     struct intel_batchbuffer *batch = encoder_context->base.batch;
528     int i;
529
530     BEGIN_BCS_BATCH(batch, 10);
531     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
532     OUT_BCS_BATCH(batch, 0);                  //Select L0
533     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
534     for(i = 0; i < 7; i++) {
535         OUT_BCS_BATCH(batch, 0x80808080);
536     }   
537     ADVANCE_BCS_BATCH(batch);
538
539     BEGIN_BCS_BATCH(batch, 10);
540     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
541     OUT_BCS_BATCH(batch, 1);                  //Select L1
542     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
543     for(i = 0; i < 7; i++) {
544         OUT_BCS_BATCH(batch, 0x80808080);
545     }   
546     ADVANCE_BCS_BATCH(batch);
547 }
548
549 static void
550 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
551 {
552     struct intel_batchbuffer *batch = encoder_context->base.batch;
553     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
554
555     BEGIN_BCS_BATCH(batch, 4);
556
557     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
558     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
559                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0);
561     OUT_BCS_BATCH(batch, 0);
562     OUT_BCS_BATCH(batch, 0);
563
564     ADVANCE_BCS_BATCH(batch);
565 }
566
567
568 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
569                                       struct encode_state *encode_state,
570                                       struct intel_encoder_context *encoder_context)
571 {
572     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
573
574     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
575     mfc_context->set_surface_state(ctx, encoder_context);
576     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
577     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
578     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
579     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
580     mfc_context->avc_qm_state(ctx, encoder_context);
581     mfc_context->avc_fqm_state(ctx, encoder_context);
582     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
583     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
584 }
585
586
587 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
588                                      struct encode_state *encode_state,
589                                      struct intel_encoder_context *encoder_context)
590 {
591     struct i965_driver_data *i965 = i965_driver_data(ctx);
592     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
593     struct object_surface *obj_surface; 
594     struct object_buffer *obj_buffer;
595     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
596     dri_bo *bo;
597     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
598     VAStatus vaStatus = VA_STATUS_SUCCESS;
599     int i, j, enable_avc_ildb = 0;
600     VAEncSliceParameterBufferH264 *slice_param;
601     VACodedBufferSegment *coded_buffer_segment;
602     unsigned char *flag = NULL;
603
604     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
605         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
606         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
607
608         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
609             assert((slice_param->slice_type == SLICE_TYPE_I) ||
610                    (slice_param->slice_type == SLICE_TYPE_SI) ||
611                    (slice_param->slice_type == SLICE_TYPE_P) ||
612                    (slice_param->slice_type == SLICE_TYPE_SP) ||
613                    (slice_param->slice_type == SLICE_TYPE_B));
614
615             if (slice_param->disable_deblocking_filter_idc != 1) {
616                 enable_avc_ildb = 1;
617                 break;
618             }
619
620             slice_param++;
621         }
622     }
623
624     /*Setup all the input&output object*/
625
626     /* Setup current frame and current direct mv buffer*/
627     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
628     assert(obj_surface);
629     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
630
631     if ( obj_surface->private_data == NULL) {
632         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
633         gen6_avc_surface->dmv_top = 
634             dri_bo_alloc(i965->intel.bufmgr,
635                          "Buffer",
636                          68*8192, 
637                          64);
638         gen6_avc_surface->dmv_bottom = 
639             dri_bo_alloc(i965->intel.bufmgr,
640                          "Buffer",
641                          68*8192, 
642                          64);
643         assert(gen6_avc_surface->dmv_top);
644         assert(gen6_avc_surface->dmv_bottom);
645         obj_surface->private_data = (void *)gen6_avc_surface;
646         obj_surface->free_private_data = (void *)gen75_mfc_free_avc_surface; 
647     }
648     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
649     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
650     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
651     dri_bo_reference(gen6_avc_surface->dmv_top);
652     dri_bo_reference(gen6_avc_surface->dmv_bottom);
653
654     if (enable_avc_ildb) {
655         mfc_context->post_deblocking_output.bo = obj_surface->bo;
656         dri_bo_reference(mfc_context->post_deblocking_output.bo);
657     } else {
658         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
659         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
660     }
661
662     mfc_context->surface_state.width = obj_surface->orig_width;
663     mfc_context->surface_state.height = obj_surface->orig_height;
664     mfc_context->surface_state.w_pitch = obj_surface->width;
665     mfc_context->surface_state.h_pitch = obj_surface->height;
666     
667     /* Setup reference frames and direct mv buffers*/
668     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
669         if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
670             obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
671             assert(obj_surface);
672             if (obj_surface->bo != NULL) {
673                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
674                 dri_bo_reference(obj_surface->bo);
675             }
676             /* Check DMV buffer */
677             if ( obj_surface->private_data == NULL) {
678                 
679                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
680                 gen6_avc_surface->dmv_top = 
681                     dri_bo_alloc(i965->intel.bufmgr,
682                                  "Buffer",
683                                  68*8192, 
684                                  64);
685                 gen6_avc_surface->dmv_bottom = 
686                     dri_bo_alloc(i965->intel.bufmgr,
687                                  "Buffer",
688                                  68*8192, 
689                                  64);
690                 assert(gen6_avc_surface->dmv_top);
691                 assert(gen6_avc_surface->dmv_bottom);
692                 obj_surface->private_data = gen6_avc_surface;
693                 obj_surface->free_private_data = gen75_mfc_free_avc_surface; 
694             }
695     
696             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
697             /* Setup DMV buffer */
698             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
699             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
700             dri_bo_reference(gen6_avc_surface->dmv_top);
701             dri_bo_reference(gen6_avc_surface->dmv_bottom);
702         } else {
703             break;
704         }
705     }
706         
707     obj_surface = SURFACE(encoder_context->input_yuv_surface);
708     assert(obj_surface && obj_surface->bo);
709     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
710     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
711
712     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
713     bo = obj_buffer->buffer_store->bo;
714     assert(bo);
715     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
716     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
717     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
718     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
719     
720     dri_bo_map(bo, 1);
721     coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
722     flag = (unsigned char *)(coded_buffer_segment + 1);
723     *flag = 0;
724     dri_bo_unmap(bo);
725
726     return vaStatus;
727 }
728
729
730 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
731                              struct encode_state *encode_state,
732                              struct intel_encoder_context *encoder_context)
733 {
734     struct intel_batchbuffer *batch = encoder_context->base.batch;
735
736     intel_batchbuffer_flush(batch);             //run the pipeline
737
738     return VA_STATUS_SUCCESS;
739 }
740
741
742 static VAStatus
743 gen75_mfc_stop(VADriverContextP ctx, 
744               struct encode_state *encode_state,
745               struct intel_encoder_context *encoder_context,
746               int *encoded_bits_size)
747 {
748     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
749     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
750     VACodedBufferSegment *coded_buffer_segment;
751     
752     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
753     assert(vaStatus == VA_STATUS_SUCCESS);
754     *encoded_bits_size = coded_buffer_segment->size * 8;
755     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
756
757     return VA_STATUS_SUCCESS;
758 }
759
760
761 static void
762 gen75_mfc_avc_slice_state(VADriverContextP ctx,
763                          VAEncPictureParameterBufferH264 *pic_param,
764                          VAEncSliceParameterBufferH264 *slice_param,
765                          struct encode_state *encode_state,
766                          struct intel_encoder_context *encoder_context,
767                          int rate_control_enable,
768                          int qp,
769                          struct intel_batchbuffer *batch)
770 {
771     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
772     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
773     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
774     int beginmb = slice_param->macroblock_address;
775     int endmb = beginmb + slice_param->num_macroblocks;
776     int beginx = beginmb % width_in_mbs;
777     int beginy = beginmb / width_in_mbs;
778     int nextx =  endmb % width_in_mbs;
779     int nexty = endmb / width_in_mbs;
780     int slice_type = slice_param->slice_type;
781     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
782     int bit_rate_control_target, maxQpN, maxQpP;
783     unsigned char correct[6], grow, shrink;
784     int i;
785     int weighted_pred_idc = 0;
786     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
787     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
788
789     if (batch == NULL)
790         batch = encoder_context->base.batch;
791
792     bit_rate_control_target = slice_type;
793     if (slice_type == SLICE_TYPE_SP)
794         bit_rate_control_target = SLICE_TYPE_P;
795     else if (slice_type == SLICE_TYPE_SI)
796         bit_rate_control_target = SLICE_TYPE_I;
797
798     if (slice_type == SLICE_TYPE_P) {
799         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
800     } else if (slice_type == SLICE_TYPE_B) {
801         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
802
803         if (weighted_pred_idc == 2) {
804             /* 8.4.3 - Derivation process for prediction weights (8-279) */
805             luma_log2_weight_denom = 5;
806             chroma_log2_weight_denom = 5;
807         }
808     }
809
810     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
811     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
812
813     for (i = 0; i < 6; i++)
814         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
815
816     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
817         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
818     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
819         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
820
821     BEGIN_BCS_BATCH(batch, 11);;
822
823     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
824     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
825
826     if (slice_type == SLICE_TYPE_I) {
827         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
828     } else {
829         OUT_BCS_BATCH(batch,
830                       (1 << 16) |                       /*1 reference frame*/
831                       (chroma_log2_weight_denom << 8) |
832                       (luma_log2_weight_denom << 0));
833     }
834
835     OUT_BCS_BATCH(batch, 
836                   (weighted_pred_idc << 30) |
837                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
838                   (slice_param->disable_deblocking_filter_idc << 27) |
839                   (slice_param->cabac_init_idc << 24) |
840                   (qp<<16) |                    /*Slice Quantization Parameter*/
841                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
842                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
843     OUT_BCS_BATCH(batch,
844                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
845                   (beginx << 16) |
846                   slice_param->macroblock_address );
847     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
848     OUT_BCS_BATCH(batch, 
849                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
850                   (1 << 30) |           /*ResetRateControlCounter*/
851                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
852                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
853                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
854                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
855                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
856                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
857                   (last_slice << 19) |     /*IsLastSlice*/
858                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
859                   (1 << 17) |       /*HeaderPresentFlag*/       
860                   (1 << 16) |       /*SliceData PresentFlag*/
861                   (1 << 15) |       /*TailPresentFlag*/
862                   (1 << 13) |       /*RBSP NAL TYPE*/   
863                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
864     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
865     OUT_BCS_BATCH(batch,
866                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
867                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
868                   (shrink << 8)  |
869                   (grow << 0));   
870     OUT_BCS_BATCH(batch,
871                   (correct[5] << 20) |
872                   (correct[4] << 16) |
873                   (correct[3] << 12) |
874                   (correct[2] << 8) |
875                   (correct[1] << 4) |
876                   (correct[0] << 0));
877     OUT_BCS_BATCH(batch, 0);
878
879     ADVANCE_BCS_BATCH(batch);
880 }
881
882
883 static void gen75_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
884                                                     struct encode_state *encode_state,
885                                                     struct intel_encoder_context *encoder_context,
886                                                     struct intel_batchbuffer *slice_batch)
887 {
888     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
889     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
890
891     if (encode_state->packed_header_data[idx]) {
892         VAEncPackedHeaderParameterBuffer *param = NULL;
893         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
894         unsigned int length_in_bits;
895
896         assert(encode_state->packed_header_param[idx]);
897         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
898         length_in_bits = param->bit_length;
899
900         mfc_context->insert_object(ctx,
901                                    encoder_context,
902                                    header_data,
903                                    ALIGN(length_in_bits, 32) >> 5,
904                                    length_in_bits & 0x1f,
905                                    5,   /* FIXME: check it */
906                                    0,
907                                    0,
908                                    !param->has_emulation_bytes,
909                                    slice_batch);
910     }
911
912     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
913
914     if (encode_state->packed_header_data[idx]) {
915         VAEncPackedHeaderParameterBuffer *param = NULL;
916         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
917         unsigned int length_in_bits;
918
919         assert(encode_state->packed_header_param[idx]);
920         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
921         length_in_bits = param->bit_length;
922
923         mfc_context->insert_object(ctx,
924                                    encoder_context,
925                                    header_data,
926                                    ALIGN(length_in_bits, 32) >> 5,
927                                    length_in_bits & 0x1f,
928                                    5, /* FIXME: check it */
929                                    0,
930                                    0,
931                                    !param->has_emulation_bytes,
932                                    slice_batch);
933     }
934     
935     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
936
937     if (encode_state->packed_header_data[idx]) {
938         VAEncPackedHeaderParameterBuffer *param = NULL;
939         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
940         unsigned int length_in_bits;
941
942         assert(encode_state->packed_header_param[idx]);
943         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
944         length_in_bits = param->bit_length;
945
946         mfc_context->insert_object(ctx,
947                                    encoder_context,
948                                    header_data,
949                                    ALIGN(length_in_bits, 32) >> 5,
950                                    length_in_bits & 0x1f,
951                                    5, /* FIXME: check it */
952                                    0,
953                                    0,
954                                    !param->has_emulation_bytes,
955                                    slice_batch);
956     }
957 }
958
959 #if __SOFTWARE__
960
961 static int
962 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
963                                 int qp,unsigned int *msg,
964                               struct intel_encoder_context *encoder_context,
965                               unsigned char target_mb_size, unsigned char max_mb_size,
966                               struct intel_batchbuffer *batch)
967 {
968     int len_in_dwords = 11;
969
970     if (batch == NULL)
971         batch = encoder_context->base.batch;
972
973     BEGIN_BCS_BATCH(batch, len_in_dwords);
974
975     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
976     OUT_BCS_BATCH(batch, 0);
977     OUT_BCS_BATCH(batch, 0);
978     OUT_BCS_BATCH(batch, 
979                   (0 << 24) |           /* PackedMvNum, Debug*/
980                   (0 << 20) |           /* No motion vector */
981                   (1 << 19) |           /* CbpDcY */
982                   (1 << 18) |           /* CbpDcU */
983                   (1 << 17) |           /* CbpDcV */
984                   (msg[0] & 0xFFFF) );
985
986     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
987     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
988     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
989
990     /*Stuff for Intra MB*/
991     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
992     OUT_BCS_BATCH(batch, msg[2]);       
993     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
994     
995     /*MaxSizeInWord and TargetSzieInWord*/
996     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
997                   (target_mb_size << 16) );
998
999     ADVANCE_BCS_BATCH(batch);
1000
1001     return len_in_dwords;
1002 }
1003
1004 static int
1005 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1006                               unsigned int *msg, unsigned int offset,
1007                               struct intel_encoder_context *encoder_context,
1008                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1009                               struct intel_batchbuffer *batch)
1010 {
1011     int len_in_dwords = 11;
1012
1013     if (batch == NULL)
1014         batch = encoder_context->base.batch;
1015
1016     BEGIN_BCS_BATCH(batch, len_in_dwords);
1017
1018     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1019
1020     OUT_BCS_BATCH(batch, msg[2]);         /* 32 MV*/
1021     OUT_BCS_BATCH(batch, offset);
1022
1023     OUT_BCS_BATCH(batch, msg[0]);
1024
1025     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1026     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1027 #if 0 
1028     if ( slice_type == SLICE_TYPE_B) {
1029         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1030     } else {
1031         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1032     }
1033 #else
1034     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1035 #endif
1036
1037
1038     /*Stuff for Inter MB*/
1039     OUT_BCS_BATCH(batch, msg[1]);        
1040     OUT_BCS_BATCH(batch, 0x0);    
1041     OUT_BCS_BATCH(batch, 0x0);        
1042
1043     /*MaxSizeInWord and TargetSzieInWord*/
1044     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1045                   (target_mb_size << 16) );
1046
1047     ADVANCE_BCS_BATCH(batch);
1048
1049     return len_in_dwords;
1050 }
1051
1052 static void 
1053 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1054                                        struct encode_state *encode_state,
1055                                        struct intel_encoder_context *encoder_context,
1056                                        int slice_index,
1057                                        struct intel_batchbuffer *slice_batch)
1058 {
1059     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1060     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1061     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1062     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1063     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1064     unsigned int *msg = NULL, offset = 0;
1065     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1066     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1067     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1068     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1069     int i,x,y;
1070     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1071     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1072     unsigned char *slice_header = NULL;
1073     int slice_header_length_in_bits = 0;
1074     unsigned int tail_data[] = { 0x0, 0x0 };
1075     int slice_type = pSliceParameter->slice_type;
1076
1077
1078     if (rate_control_mode == VA_RC_CBR) {
1079         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1080         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1081     }
1082
1083     /* only support for 8-bit pixel bit-depth */
1084     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1085     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1086     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1087     assert(qp >= 0 && qp < 52);
1088
1089     gen75_mfc_avc_slice_state(ctx, 
1090                              pPicParameter,
1091                              pSliceParameter,
1092                              encode_state, encoder_context,
1093                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1094
1095     if ( slice_index == 0) 
1096         gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1097
1098     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1099
1100     // slice hander
1101     mfc_context->insert_object(ctx, encoder_context,
1102                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1103                                5,  /* first 5 bytes are start code + nal unit type */
1104                                1, 0, 1, slice_batch);
1105
1106     dri_bo_map(vme_context->vme_output.bo , 1);
1107     msg = (unsigned int *)vme_context->vme_output.bo->virtual;
1108
1109     if (is_intra) {
1110         msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
1111     } else {
1112         msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
1113         msg += 32; /* the first 32 DWs are MVs */
1114         offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
1115     }
1116    
1117     for (i = pSliceParameter->macroblock_address; 
1118          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1119         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1120         x = i % width_in_mbs;
1121         y = i / width_in_mbs;
1122
1123         if (is_intra) {
1124             assert(msg);
1125             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1126             msg += INTRA_VME_OUTPUT_IN_DWS;
1127         } else {
1128             if (msg[0] & INTRA_MB_FLAG_MASK) {
1129                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1130             } else {
1131                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1132             }
1133
1134             msg += INTER_VME_OUTPUT_IN_DWS;
1135             offset += INTER_VME_OUTPUT_IN_BYTES;
1136         }
1137     }
1138    
1139     dri_bo_unmap(vme_context->vme_output.bo);
1140
1141     if ( last_slice ) {    
1142         mfc_context->insert_object(ctx, encoder_context,
1143                                    tail_data, 2, 8,
1144                                    2, 1, 1, 0, slice_batch);
1145     } else {
1146         mfc_context->insert_object(ctx, encoder_context,
1147                                    tail_data, 1, 8,
1148                                    1, 1, 1, 0, slice_batch);
1149     }
1150
1151     free(slice_header);
1152
1153 }
1154
1155 static dri_bo *
1156 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1157                                   struct encode_state *encode_state,
1158                                   struct intel_encoder_context *encoder_context)
1159 {
1160     struct i965_driver_data *i965 = i965_driver_data(ctx);
1161     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1162     dri_bo *batch_bo = batch->buffer;
1163     int i;
1164
1165     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1166         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1167     }
1168
1169     intel_batchbuffer_align(batch, 8);
1170     
1171     BEGIN_BCS_BATCH(batch, 2);
1172     OUT_BCS_BATCH(batch, 0);
1173     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1174     ADVANCE_BCS_BATCH(batch);
1175
1176     dri_bo_reference(batch_bo);
1177     intel_batchbuffer_free(batch);
1178
1179     return batch_bo;
1180 }
1181
1182 #else
1183
1184 static void
1185 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1186                                     struct encode_state *encode_state,
1187                                     struct intel_encoder_context *encoder_context)
1188
1189 {
1190     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1191     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1192
1193     assert(vme_context->vme_output.bo);
1194     mfc_context->buffer_suface_setup(ctx,
1195                                      &mfc_context->gpe_context,
1196                                      &vme_context->vme_output,
1197                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1198                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1199     assert(mfc_context->aux_batchbuffer_surface.bo);
1200     mfc_context->buffer_suface_setup(ctx,
1201                                      &mfc_context->gpe_context,
1202                                      &mfc_context->aux_batchbuffer_surface,
1203                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1204                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1205 }
1206
1207 static void
1208 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1209                                      struct encode_state *encode_state,
1210                                      struct intel_encoder_context *encoder_context)
1211
1212 {
1213     struct i965_driver_data *i965 = i965_driver_data(ctx);
1214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1215     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1216     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1217     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1218     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1219     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1220     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1221     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1222                                                            "MFC batchbuffer",
1223                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1224                                                            0x1000);
1225     mfc_context->buffer_suface_setup(ctx,
1226                                      &mfc_context->gpe_context,
1227                                      &mfc_context->mfc_batchbuffer_surface,
1228                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1229                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1230 }
1231
1232 static void
1233 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1234                                     struct encode_state *encode_state,
1235                                     struct intel_encoder_context *encoder_context)
1236 {
1237     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1238     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1239 }
1240
1241 static void
1242 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1243                                 struct encode_state *encode_state,
1244                                 struct intel_encoder_context *encoder_context)
1245 {
1246     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1247     struct gen6_interface_descriptor_data *desc;   
1248     int i;
1249     dri_bo *bo;
1250
1251     bo = mfc_context->gpe_context.idrt.bo;
1252     dri_bo_map(bo, 1);
1253     assert(bo->virtual);
1254     desc = bo->virtual;
1255
1256     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1257         struct i965_kernel *kernel;
1258
1259         kernel = &mfc_context->gpe_context.kernels[i];
1260         assert(sizeof(*desc) == 32);
1261
1262         /*Setup the descritor table*/
1263         memset(desc, 0, sizeof(*desc));
1264         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1265         desc->desc2.sampler_count = 0;
1266         desc->desc2.sampler_state_pointer = 0;
1267         desc->desc3.binding_table_entry_count = 2;
1268         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1269         desc->desc4.constant_urb_entry_read_offset = 0;
1270         desc->desc4.constant_urb_entry_read_length = 4;
1271                 
1272         /*kernel start*/
1273         dri_bo_emit_reloc(bo,   
1274                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1275                           0,
1276                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1277                           kernel->bo);
1278         desc++;
1279     }
1280
1281     dri_bo_unmap(bo);
1282 }
1283
1284 static void
1285 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1286                                     struct encode_state *encode_state,
1287                                     struct intel_encoder_context *encoder_context)
1288 {
1289     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1290     
1291     (void)mfc_context;
1292 }
1293
1294 static void
1295 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1296                                          int index,
1297                                          int head_offset,
1298                                          int batchbuffer_offset,
1299                                          int head_size,
1300                                          int tail_size,
1301                                          int number_mb_cmds,
1302                                          int first_object,
1303                                          int last_object,
1304                                          int last_slice,
1305                                          int mb_x,
1306                                          int mb_y,
1307                                          int width_in_mbs,
1308                                          int qp)
1309 {
1310     BEGIN_BATCH(batch, 12);
1311     
1312     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1313     OUT_BATCH(batch, index);
1314     OUT_BATCH(batch, 0);
1315     OUT_BATCH(batch, 0);
1316     OUT_BATCH(batch, 0);
1317     OUT_BATCH(batch, 0);
1318    
1319     /*inline data */
1320     OUT_BATCH(batch, head_offset);
1321     OUT_BATCH(batch, batchbuffer_offset);
1322     OUT_BATCH(batch, 
1323               head_size << 16 |
1324               tail_size);
1325     OUT_BATCH(batch,
1326               number_mb_cmds << 16 |
1327               first_object << 2 |
1328               last_object << 1 |
1329               last_slice);
1330     OUT_BATCH(batch,
1331               mb_y << 8 |
1332               mb_x);
1333     OUT_BATCH(batch,
1334               qp << 16 |
1335               width_in_mbs);
1336
1337     ADVANCE_BATCH(batch);
1338 }
1339
1340 static void
1341 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1342                                        struct intel_encoder_context *encoder_context,
1343                                        VAEncSliceParameterBufferH264 *slice_param,
1344                                        int head_offset,
1345                                        unsigned short head_size,
1346                                        unsigned short tail_size,
1347                                        int batchbuffer_offset,
1348                                        int qp,
1349                                        int last_slice)
1350 {
1351     struct intel_batchbuffer *batch = encoder_context->base.batch;
1352     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1353     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1354     int total_mbs = slice_param->num_macroblocks;
1355     int number_mb_cmds = 128;
1356     int starting_mb = 0;
1357     int last_object = 0;
1358     int first_object = 1;
1359     int i;
1360     int mb_x, mb_y;
1361     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1362
1363     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1364         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1365         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1366         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1367         assert(mb_x <= 255 && mb_y <= 255);
1368
1369         starting_mb += number_mb_cmds;
1370
1371         gen75_mfc_batchbuffer_emit_object_command(batch,
1372                                                  index,
1373                                                  head_offset,
1374                                                  batchbuffer_offset,
1375                                                  head_size,
1376                                                  tail_size,
1377                                                  number_mb_cmds,
1378                                                  first_object,
1379                                                  last_object,
1380                                                  last_slice,
1381                                                  mb_x,
1382                                                  mb_y,
1383                                                  width_in_mbs,
1384                                                  qp);
1385
1386         if (first_object) {
1387             head_offset += head_size;
1388             batchbuffer_offset += head_size;
1389         }
1390
1391         if (last_object) {
1392             head_offset += tail_size;
1393             batchbuffer_offset += tail_size;
1394         }
1395
1396         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1397
1398         first_object = 0;
1399     }
1400
1401     if (!last_object) {
1402         last_object = 1;
1403         number_mb_cmds = total_mbs % number_mb_cmds;
1404         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1405         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1406         assert(mb_x <= 255 && mb_y <= 255);
1407         starting_mb += number_mb_cmds;
1408
1409         gen75_mfc_batchbuffer_emit_object_command(batch,
1410                                                  index,
1411                                                  head_offset,
1412                                                  batchbuffer_offset,
1413                                                  head_size,
1414                                                  tail_size,
1415                                                  number_mb_cmds,
1416                                                  first_object,
1417                                                  last_object,
1418                                                  last_slice,
1419                                                  mb_x,
1420                                                  mb_y,
1421                                                  width_in_mbs,
1422                                                  qp);
1423     }
1424 }
1425                           
1426 /*
1427  * return size in Owords (16bytes)
1428  */         
1429 static int
1430 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1431                                struct encode_state *encode_state,
1432                                struct intel_encoder_context *encoder_context,
1433                                int slice_index,
1434                                int batchbuffer_offset)
1435 {
1436     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1437     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1438     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1439     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1440     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1441     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1442     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1443     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1444     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1445     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1446     unsigned char *slice_header = NULL;
1447     int slice_header_length_in_bits = 0;
1448     unsigned int tail_data[] = { 0x0, 0x0 };
1449     long head_offset;
1450     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1451     unsigned short head_size, tail_size;
1452     int slice_type = pSliceParameter->slice_type;
1453
1454     if (rate_control_mode == VA_RC_CBR) {
1455         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1456         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1457     }
1458
1459     /* only support for 8-bit pixel bit-depth */
1460     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1461     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1462     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1463     assert(qp >= 0 && qp < 52);
1464
1465     head_offset = old_used / 16;
1466     gen75_mfc_avc_slice_state(ctx,
1467                              pPicParameter,
1468                              pSliceParameter,
1469                              encode_state,
1470                              encoder_context,
1471                              (rate_control_mode == VA_RC_CBR),
1472                              qp,
1473                              slice_batch);
1474
1475     if (slice_index == 0)
1476         gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1477
1478     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1479
1480     // slice hander
1481     mfc_context->insert_object(ctx,
1482                                encoder_context,
1483                                (unsigned int *)slice_header,
1484                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1485                                slice_header_length_in_bits & 0x1f,
1486                                5,  /* first 5 bytes are start code + nal unit type */
1487                                1,
1488                                0,
1489                                1,
1490                                slice_batch);
1491     free(slice_header);
1492
1493     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1494     used = intel_batchbuffer_used_size(slice_batch);
1495     head_size = (used - old_used) / 16;
1496     old_used = used;
1497
1498     /* tail */
1499     if (last_slice) {    
1500         mfc_context->insert_object(ctx,
1501                                    encoder_context,
1502                                    tail_data,
1503                                    2,
1504                                    8,
1505                                    2,
1506                                    1,
1507                                    1,
1508                                    0,
1509                                    slice_batch);
1510     } else {
1511         mfc_context->insert_object(ctx,
1512                                    encoder_context,
1513                                    tail_data,
1514                                    1,
1515                                    8,
1516                                    1,
1517                                    1,
1518                                    1,
1519                                    0,
1520                                    slice_batch);
1521     }
1522
1523     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1524     used = intel_batchbuffer_used_size(slice_batch);
1525     tail_size = (used - old_used) / 16;
1526
1527    
1528     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1529                                            encoder_context,
1530                                            pSliceParameter,
1531                                            head_offset,
1532                                            head_size,
1533                                            tail_size,
1534                                            batchbuffer_offset,
1535                                            qp,
1536                                            last_slice);
1537
1538     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1539 }
1540
1541 static void
1542 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1543                                   struct encode_state *encode_state,
1544                                   struct intel_encoder_context *encoder_context)
1545 {
1546     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1547     struct intel_batchbuffer *batch = encoder_context->base.batch;
1548     int i, size, offset = 0;
1549     intel_batchbuffer_start_atomic(batch, 0x4000); 
1550     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1551
1552     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1553         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1554         offset += size;
1555     }
1556
1557     intel_batchbuffer_end_atomic(batch);
1558     intel_batchbuffer_flush(batch);
1559 }
1560
1561 static void
1562 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1563                                struct encode_state *encode_state,
1564                                struct intel_encoder_context *encoder_context)
1565 {
1566     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1567     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1568     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1569     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1570 }
1571
1572 static dri_bo *
1573 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1574                                   struct encode_state *encode_state,
1575                                   struct intel_encoder_context *encoder_context)
1576 {
1577     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1578
1579     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1580     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1581
1582     return mfc_context->mfc_batchbuffer_surface.bo;
1583 }
1584
1585 #endif
1586
1587 static void
1588 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1589                                  struct encode_state *encode_state,
1590                                  struct intel_encoder_context *encoder_context)
1591 {
1592     struct intel_batchbuffer *batch = encoder_context->base.batch;
1593     dri_bo *slice_batch_bo;
1594
1595     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1596         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1597         assert(0);
1598         return; 
1599     }
1600
1601 #if __SOFTWARE__
1602     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1603 #else
1604     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1605 #endif
1606
1607     // begin programing
1608     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1609     intel_batchbuffer_emit_mi_flush(batch);
1610     
1611     // picture level programing
1612     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1613
1614     BEGIN_BCS_BATCH(batch, 2);
1615     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1616     OUT_BCS_RELOC(batch,
1617                   slice_batch_bo,
1618                   I915_GEM_DOMAIN_COMMAND, 0, 
1619                   0);
1620     ADVANCE_BCS_BATCH(batch);
1621
1622     // end programing
1623     intel_batchbuffer_end_atomic(batch);
1624
1625     dri_bo_unreference(slice_batch_bo);
1626 }
1627
1628
1629 static VAStatus
1630 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1631                             struct encode_state *encode_state,
1632                             struct intel_encoder_context *encoder_context)
1633 {
1634     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1635     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1636     int current_frame_bits_size;
1637     int sts;
1638  
1639     for (;;) {
1640         gen75_mfc_init(ctx, encoder_context);
1641         gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1642         /*Programing bcs pipeline*/
1643         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1644         gen75_mfc_run(ctx, encode_state, encoder_context);
1645         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1646             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1647             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1648             if (sts == BRC_NO_HRD_VIOLATION) {
1649                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1650                 break;
1651             }
1652             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1653                 if (!mfc_context->hrd.violation_noted) {
1654                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1655                     mfc_context->hrd.violation_noted = 1;
1656                 }
1657                 return VA_STATUS_SUCCESS;
1658             }
1659         } else {
1660             break;
1661         }
1662     }
1663
1664     return VA_STATUS_SUCCESS;
1665 }
1666
1667
1668 static void
1669 gen75_mfc_context_destroy(void *context)
1670 {
1671     struct gen6_mfc_context *mfc_context = context;
1672     int i;
1673
1674     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1675     mfc_context->post_deblocking_output.bo = NULL;
1676
1677     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1678     mfc_context->pre_deblocking_output.bo = NULL;
1679
1680     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1681     mfc_context->uncompressed_picture_source.bo = NULL;
1682
1683     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1684     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1685
1686     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1687         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1688         mfc_context->direct_mv_buffers[i].bo = NULL;
1689     }
1690
1691     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1692     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1693
1694     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1695     mfc_context->macroblock_status_buffer.bo = NULL;
1696
1697     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1698     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1699
1700     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1701     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1702
1703
1704     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1705         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1706         mfc_context->reference_surfaces[i].bo = NULL;  
1707     }
1708
1709     i965_gpe_context_destroy(&mfc_context->gpe_context);
1710
1711     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1712     mfc_context->mfc_batchbuffer_surface.bo = NULL;
1713
1714     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1715     mfc_context->aux_batchbuffer_surface.bo = NULL;
1716
1717     if (mfc_context->aux_batchbuffer)
1718         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1719
1720     mfc_context->aux_batchbuffer = NULL;
1721
1722     free(mfc_context);
1723 }
1724
1725 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1726                   VAProfile profile,
1727                   struct encode_state *encode_state,
1728                   struct intel_encoder_context *encoder_context)
1729 {
1730     VAStatus vaStatus;
1731
1732     switch (profile) {
1733     case VAProfileH264Baseline:
1734     case VAProfileH264Main:
1735     case VAProfileH264High:
1736         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1737         break;
1738
1739         /* FIXME: add for other profile */
1740     default:
1741         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1742         break;
1743     }
1744
1745     return vaStatus;
1746 }
1747
1748 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1749 {
1750     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1751
1752     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1753
1754     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1755     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1756
1757     mfc_context->gpe_context.curbe.length = 32 * 4;
1758
1759     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1760     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1761     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1762     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1763     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1764
1765     i965_gpe_load_kernels(ctx,
1766                           &mfc_context->gpe_context,
1767                           gen75_mfc_kernels,
1768                           NUM_MFC_KERNEL);
1769
1770     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1771     mfc_context->set_surface_state = gen75_mfc_surface_state;
1772     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1773     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1774     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1775     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1776     mfc_context->insert_object = gen75_mfc_avc_insert_object;
1777     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1778
1779     encoder_context->mfc_context = mfc_context;
1780     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1781     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1782     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1783
1784     return True;
1785 }