Synced gen7 with gen6 for HRD.
[profile/ivi/vaapi-intel-driver.git] / src / gen7_mfc.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *    Xiang, Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen7_mfc.h"
42 #include "gen6_vme.h"
43
44 static void
45 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
46                           int standard_select,
47                           struct intel_encoder_context *encoder_context)
48 {
49     struct intel_batchbuffer *batch = encoder_context->base.batch;
50
51     assert(standard_select == MFX_FORMAT_MPEG2 ||
52            standard_select == MFX_FORMAT_AVC);
53
54     BEGIN_BCS_BATCH(batch, 5);
55
56     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
57     OUT_BCS_BATCH(batch,
58                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
59                   (MFD_MODE_VLD << 15) | /* VLD mode */
60                   (0 << 10) | /* disable Stream-Out */
61                   (1 << 9)  | /* Post Deblocking Output */
62                   (0 << 8)  | /* Pre Deblocking Output */
63                   (0 << 5)  | /* not in stitch mode */
64                   (1 << 4)  | /* encoding mode */
65                   (standard_select << 0));  /* standard select: avc or mpeg2 */
66     OUT_BCS_BATCH(batch,
67                   (0 << 7)  | /* expand NOA bus flag */
68                   (0 << 6)  | /* disable slice-level clock gating */
69                   (0 << 5)  | /* disable clock gating for NOA */
70                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
71                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
72                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
73                   (0 << 1)  |
74                   (0 << 0));
75     OUT_BCS_BATCH(batch, 0);
76     OUT_BCS_BATCH(batch, 0);
77
78     ADVANCE_BCS_BATCH(batch);
79 }
80
81 static void
82 gen7_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
83 {
84     struct intel_batchbuffer *batch = encoder_context->base.batch;
85     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
86
87     BEGIN_BCS_BATCH(batch, 6);
88
89     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
90     OUT_BCS_BATCH(batch, 0);
91     OUT_BCS_BATCH(batch,
92                   ((mfc_context->surface_state.height - 1) << 18) |
93                   ((mfc_context->surface_state.width - 1) << 4));
94     OUT_BCS_BATCH(batch,
95                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
96                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
97                   (0 << 22) | /* surface object control state, FIXME??? */
98                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
99                   (0 << 2)  | /* must be 0 for interleave U/V */
100                   (1 << 1)  | /* must be tiled */
101                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
102     OUT_BCS_BATCH(batch,
103                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
104                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen7_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
112 {
113     struct intel_batchbuffer *batch = encoder_context->base.batch;
114     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
115     int i;
116
117     BEGIN_BCS_BATCH(batch, 24);
118
119     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
120     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
121     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
122                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
123                   0);                                                                                   /* post output addr  */ 
124     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
125                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
126                   0);                                                                                   /* uncompressed data */
127     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
128                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
129                   0);                                                                                   /* StreamOut data*/
130     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
131                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
132                   0);   
133     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
134                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
135                   0);
136
137     /* 7..22 Reference pictures*/
138     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
139         if ( mfc_context->reference_surfaces[i].bo != NULL) {
140             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
141                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
142                           0);                   
143         } else {
144             OUT_BCS_BATCH(batch, 0);
145         }
146     }
147
148     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
149                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
150                   0);                                                                                   /* Macroblock status buffer*/
151
152     ADVANCE_BCS_BATCH(batch);
153 }
154
155 static void
156 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
157 {
158     struct intel_batchbuffer *batch = encoder_context->base.batch;
159     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     BEGIN_BCS_BATCH(batch, 11);
163
164     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     /* MFX Indirect MV Object Base Address */
168     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
169     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
175     OUT_BCS_RELOC(batch,
176                   mfc_context->mfc_indirect_pak_bse_object.bo,
177                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                   0);
179     OUT_BCS_RELOC(batch,
180                   mfc_context->mfc_indirect_pak_bse_object.bo,
181                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
182                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
183
184     ADVANCE_BCS_BATCH(batch);
185 }
186
187 static void
188 gen7_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
189 {
190     struct intel_batchbuffer *batch = encoder_context->base.batch;
191     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
192
193     BEGIN_BCS_BATCH(batch, 4);
194
195     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
196     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   0);
199     OUT_BCS_BATCH(batch, 0);
200     OUT_BCS_BATCH(batch, 0);
201
202     ADVANCE_BCS_BATCH(batch);
203 }
204
205 static void
206 gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
207                        struct intel_encoder_context *encoder_context)
208 {
209     struct intel_batchbuffer *batch = encoder_context->base.batch;
210     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
211     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
212
213     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
214     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
215
216     BEGIN_BCS_BATCH(batch, 16);
217
218     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
219     OUT_BCS_BATCH(batch,
220                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
221     OUT_BCS_BATCH(batch, 
222                   ((height_in_mbs - 1) << 16) | 
223                   ((width_in_mbs - 1) << 0));
224     OUT_BCS_BATCH(batch, 
225                   (0 << 24) |   /* Second Chroma QP Offset */
226                   (0 << 16) |   /* Chroma QP Offset */
227                   (0 << 14) |   /* Max-bit conformance Intra flag */
228                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
229                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
230                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
231                   (0 << 8)  |   /* FIXME: Image Structure */
232                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
233     OUT_BCS_BATCH(batch,
234                   (0 << 16) |   /* Mininum Frame size */
235                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
236                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
237                   (0 << 13) |   /* CABAC 0 word insertion test enable */
238                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
239                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
240                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
241                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
242                   (0 << 6)  |   /* Only valid for VLD decoding mode */
243                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
244                   (0 << 4)  |   /* Direct 8x8 inference flag */
245                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
246                   (1 << 2)  |   /* Frame MB only flag */
247                   (0 << 1)  |   /* MBAFF mode is in active */
248                   (0 << 0));    /* Field picture flag */
249     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
250     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
251                   (0xBB8 << 16) |       /* InterMbMaxSz */
252                   (0xEE8) );            /* IntraMbMaxSz */
253     OUT_BCS_BATCH(batch, 0);            /* Reserved */
254     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
255     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
256     OUT_BCS_BATCH(batch, 0x8C000000);
257     OUT_BCS_BATCH(batch, 0x00010000);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262
263     ADVANCE_BCS_BATCH(batch);
264 }
265
266 static void
267 gen7_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
268 {
269     struct intel_batchbuffer *batch = encoder_context->base.batch;
270     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
271
272     int i;
273
274     BEGIN_BCS_BATCH(batch, 69);
275
276     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
277
278     /* Reference frames and Current frames */
279     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
280         if (mfc_context->direct_mv_buffers[i].bo != NULL) { 
281             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
282                           I915_GEM_DOMAIN_INSTRUCTION, 0,
283                           0);
284         } else {
285             OUT_BCS_BATCH(batch, 0);
286         }
287     }
288
289     /* POL list */
290     for (i = 0; i < 32; i++) {
291         OUT_BCS_BATCH(batch, i / 2);
292     }
293
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296
297     ADVANCE_BCS_BATCH(batch);
298 }
299
300 static void
301 gen7_mfc_avc_slice_state(VADriverContextP ctx,
302                          VAEncSliceParameterBufferH264 *slice_param,
303                          struct encode_state *encode_state,
304                          struct intel_encoder_context *encoder_context,
305                          int rate_control_enable,
306                          int qp)
307 {
308     struct intel_batchbuffer *batch = encoder_context->base.batch;
309     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
310     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
311     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
312     int beginmb = slice_param->starting_macroblock_address;
313     int endmb = beginmb + slice_param->number_of_mbs;
314     int beginx = beginmb % width_in_mbs;
315     int beginy = beginmb / width_in_mbs;
316     int nextx =  endmb % width_in_mbs;
317     int nexty = endmb / width_in_mbs;
318     int slice_type = slice_param->slice_type;
319     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
320     int bit_rate_control_target, maxQpN, maxQpP;
321     unsigned char correct[6], grow, shrink;
322     int i;
323
324     if (slice_type == SLICE_TYPE_I)
325         bit_rate_control_target = 0;
326     else
327         bit_rate_control_target = 1;
328
329     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
330     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
331
332     for (i = 0; i < 6; i++)
333         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
334
335     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
336         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
337     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
338         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
339
340     BEGIN_BCS_BATCH(batch, 11);;
341
342     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
343     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
344
345     if (slice_type == SLICE_TYPE_I) {
346         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
347     } else {
348         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
349     }
350
351     OUT_BCS_BATCH(batch, 
352                   (slice_param->direct_spatial_mv_pred_flag << 29) |             /*Direct Prediction Type*/
353                   (0 << 24) |                /*Enable deblocking operation*/
354                   (qp << 16) |                  /*Slice Quantization Parameter*/
355                   (0x0202 << 0));
356     OUT_BCS_BATCH(batch, (beginy << 24) |                       /*First MB X&Y , the begin postion of current slice*/
357                          (beginx << 16) |
358                          slice_param->starting_macroblock_address );
359     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
360     OUT_BCS_BATCH(batch, 
361                   (rate_control_enable << 31) |         /*in CBR mode RateControlCounterEnable = enable*/
362                   (1 << 30) |           /*ResetRateControlCounter*/
363                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
364                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
365                   (rate_control_enable << 23) |     /*RC Panic Enable*/                 
366                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
367                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
368                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
369                   (last_slice << 19) |     /*IsLastSlice*/
370                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
371                   (1 << 17) |       /*HeaderPresentFlag*/       
372                   (1 << 16) |       /*SliceData PresentFlag*/
373                   (1 << 15) |       /*TailPresentFlag*/
374                   (1 << 13) |       /*RBSP NAL TYPE*/   
375                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
376     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
377     OUT_BCS_BATCH(batch,
378                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
379                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
380                   (shrink << 8)  |
381                   (grow << 0));   
382     OUT_BCS_BATCH(batch,
383                   (correct[5] << 20) |
384                   (correct[4] << 16) |
385                   (correct[3] << 12) |
386                   (correct[2] << 8) |
387                   (correct[1] << 4) |
388                   (correct[0] << 0));
389     OUT_BCS_BATCH(batch, 0);
390
391     ADVANCE_BCS_BATCH(batch);
392 }
393
394 static void
395 gen7_mfc_qm_state(VADriverContextP ctx,
396                   int qm_type,
397                   unsigned int *qm,
398                   int qm_length,
399                   struct intel_encoder_context *encoder_context)
400 {
401     struct intel_batchbuffer *batch = encoder_context->base.batch;
402     unsigned int qm_buffer[16];
403
404     assert(qm_length <= 16);
405     assert(sizeof(*qm) == 4);
406     memcpy(qm_buffer, qm, qm_length * 4);
407
408     BEGIN_BCS_BATCH(batch, 18);
409     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
410     OUT_BCS_BATCH(batch, qm_type << 0);
411     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
412     ADVANCE_BCS_BATCH(batch);
413 }
414
415 static void
416 gen7_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
417 {
418     unsigned int qm[16] = {
419         0x10101010, 0x10101010, 0x10101010, 0x10101010,
420         0x10101010, 0x10101010, 0x10101010, 0x10101010,
421         0x10101010, 0x10101010, 0x10101010, 0x10101010,
422         0x10101010, 0x10101010, 0x10101010, 0x10101010
423     };
424
425     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
426     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
427     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
428     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
429 }
430
431 static void
432 gen7_mfc_fqm_state(VADriverContextP ctx,
433                    int fqm_type,
434                    unsigned int *fqm,
435                    int fqm_length,
436                    struct intel_encoder_context *encoder_context)
437 {
438     struct intel_batchbuffer *batch = encoder_context->base.batch;
439     unsigned int fqm_buffer[32];
440
441     assert(fqm_length <= 32);
442     assert(sizeof(*fqm) == 4);
443     memcpy(fqm_buffer, fqm, fqm_length * 4);
444
445     BEGIN_BCS_BATCH(batch, 34);
446     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
447     OUT_BCS_BATCH(batch, fqm_type << 0);
448     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
449     ADVANCE_BCS_BATCH(batch);
450 }
451
452 static void
453 gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
454 {
455     unsigned int qm[32] = {
456         0x10001000, 0x10001000, 0x10001000, 0x10001000,
457         0x10001000, 0x10001000, 0x10001000, 0x10001000,
458         0x10001000, 0x10001000, 0x10001000, 0x10001000,
459         0x10001000, 0x10001000, 0x10001000, 0x10001000,
460         0x10001000, 0x10001000, 0x10001000, 0x10001000,
461         0x10001000, 0x10001000, 0x10001000, 0x10001000,
462         0x10001000, 0x10001000, 0x10001000, 0x10001000,
463         0x10001000, 0x10001000, 0x10001000, 0x10001000
464     };
465
466     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
467     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
468     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
469     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
470 }
471
472 static void
473 gen7_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
474 {
475     struct intel_batchbuffer *batch = encoder_context->base.batch;
476     int i;
477
478     BEGIN_BCS_BATCH(batch, 10);
479     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
480     OUT_BCS_BATCH(batch, 0);                  //Select L0
481     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
482
483     for (i = 0; i < 7; i++) {
484         OUT_BCS_BATCH(batch, 0x80808080);
485     }   
486
487     ADVANCE_BCS_BATCH(batch);
488
489     BEGIN_BCS_BATCH(batch, 10);
490     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
491     OUT_BCS_BATCH(batch, 1);                  //Select L1
492     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
493     for(i = 0; i < 7; i++) {
494         OUT_BCS_BATCH(batch, 0x80808080);
495     }   
496     ADVANCE_BCS_BATCH(batch);
497 }
498         
499 static void
500 gen7_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
501                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
502                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag)
503 {
504     struct intel_batchbuffer *batch = encoder_context->base.batch;
505
506     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
507
508     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
509     OUT_BCS_BATCH(batch,
510                   (0 << 16) |   /* always start at offset 0 */
511                   (data_bits_in_last_dw << 8) |
512                   (skip_emul_byte_count << 4) |
513                   (!!emulation_flag << 3) |
514                   ((!!is_last_header) << 2) |
515                   ((!!is_end_of_slice) << 1) |
516                   (0 << 0));    /* FIXME: ??? */
517     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
518
519     ADVANCE_BCS_BATCH(batch);
520 }
521
522 static int
523 gen7_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
524                               struct intel_encoder_context *encoder_context,
525                               unsigned char target_mb_size, unsigned char max_mb_size)
526 {
527     struct intel_batchbuffer *batch = encoder_context->base.batch;
528     int len_in_dwords = 11;
529
530     BEGIN_BCS_BATCH(batch, len_in_dwords);
531
532     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
533     OUT_BCS_BATCH(batch, 0);
534     OUT_BCS_BATCH(batch, 0);
535     OUT_BCS_BATCH(batch, 
536                   (0 << 24) |           /* PackedMvNum, Debug*/
537                   (0 << 20) |           /* No motion vector */
538                   (1 << 19) |           /* CbpDcY */
539                   (1 << 18) |           /* CbpDcU */
540                   (1 << 17) |           /* CbpDcV */
541                   (msg[0] & 0xFFFF) );
542
543     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
544     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
545     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
546
547     /*Stuff for Intra MB*/
548     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
549     OUT_BCS_BATCH(batch, msg[2]);       
550     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
551     
552     /*MaxSizeInWord and TargetSzieInWord*/
553     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
554                   (target_mb_size << 16) );
555
556     ADVANCE_BCS_BATCH(batch);
557
558     return len_in_dwords;
559 }
560
561 static int
562 gen7_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
563                               struct intel_encoder_context *encoder_context,
564                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type)
565 {
566     struct intel_batchbuffer *batch = encoder_context->base.batch;
567     int len_in_dwords = 11;
568
569     BEGIN_BCS_BATCH(batch, len_in_dwords);
570
571     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
572
573     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
574     OUT_BCS_BATCH(batch, offset);
575
576     OUT_BCS_BATCH(batch, 
577                   (1 << 24) |     /* PackedMvNum, Debug*/
578                   (4 << 20) |     /* 8 MV, SNB don't use it*/
579                   (1 << 19) |     /* CbpDcY */
580                   (1 << 18) |     /* CbpDcU */
581                   (1 << 17) |     /* CbpDcV */
582                   (0 << 15) |     /* Transform8x8Flag = 0*/
583                   (0 << 14) |     /* Frame based*/
584                   (0 << 13) |     /* Inter MB */
585                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
586                   (0 << 7)  |     /* MBZ for frame */
587                   (0 << 6)  |     /* MBZ */
588                   (2 << 4)  |     /* MBZ for inter*/
589                   (0 << 3)  |     /* MBZ */
590                   (0 << 2)  |     /* SkipMbFlag */
591                   (0 << 0));      /* InterMbMode */
592
593     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
594     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
595 #if 0 
596     if ( slice_type == SLICE_TYPE_B) {
597         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
598     } else {
599         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
600     }
601 #else
602     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
603 #endif
604
605
606     /*Stuff for Inter MB*/
607     OUT_BCS_BATCH(batch, 0x0);        
608     OUT_BCS_BATCH(batch, 0x0);    
609     OUT_BCS_BATCH(batch, 0x0);        
610
611     /*MaxSizeInWord and TargetSzieInWord*/
612     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
613                   (target_mb_size << 16) );
614
615     ADVANCE_BCS_BATCH(batch);
616
617     return len_in_dwords;
618 }
619
620 static void
621 gen7_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
622 {
623     struct i965_driver_data *i965 = i965_driver_data(ctx);
624     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
625     dri_bo *bo;
626     int i;
627
628     /*Encode common setup for MFC*/
629     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
630     mfc_context->post_deblocking_output.bo = NULL;
631
632     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
633     mfc_context->pre_deblocking_output.bo = NULL;
634
635     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
636     mfc_context->uncompressed_picture_source.bo = NULL;
637
638     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
639     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
640
641     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
642         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
643         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
644         mfc_context->direct_mv_buffers[i].bo = NULL;
645     }
646
647     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
648         if (mfc_context->reference_surfaces[i].bo != NULL)
649             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
650         mfc_context->reference_surfaces[i].bo = NULL;  
651     }
652
653     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
654     bo = dri_bo_alloc(i965->intel.bufmgr,
655                       "Buffer",
656                       128 * 64,
657                       64);
658     assert(bo);
659     mfc_context->intra_row_store_scratch_buffer.bo = bo;
660
661     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
662     bo = dri_bo_alloc(i965->intel.bufmgr,
663                       "Buffer",
664                       128*128*16,
665                       64);
666     assert(bo);
667     mfc_context->macroblock_status_buffer.bo = bo;
668
669     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
670     bo = dri_bo_alloc(i965->intel.bufmgr,
671                       "Buffer",
672                       49152,  /* 6 * 128 * 64 */
673                       64);
674     assert(bo);
675     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
676
677     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
678     bo = dri_bo_alloc(i965->intel.bufmgr,
679                       "Buffer",
680                       12288, /* 1.5 * 128 * 64 */
681                       0x1000);
682     assert(bo);
683     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
684 }
685
686 static void gen7_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
687                                       struct encode_state *encode_state,
688                                       struct intel_encoder_context *encoder_context)
689 {
690     static int count = 0;
691     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
692     int rate_control_mode = pSequenceParameter->rate_control_method;   
693
694     if (encode_state->packed_header_data[VAEncPackedHeaderSPS]) {
695         VAEncPackedHeaderParameterBuffer *param = NULL;
696         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderSPS]->buffer;
697         unsigned int length_in_bits;
698
699         assert(encode_state->packed_header_param[VAEncPackedHeaderSPS]);
700         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderSPS]->buffer;
701         length_in_bits = param->length_in_bits[0];
702
703         gen7_mfc_avc_insert_object(ctx, 
704                 encoder_context,
705                 header_data,
706                 ALIGN(length_in_bits, 32) >> 5,
707                 length_in_bits & 0x1f,
708                 param->skip_emulation_check_count,
709                 0,
710                 0,
711                 param->insert_emulation_bytes);
712     }
713
714     if (encode_state->packed_header_data[VAEncPackedHeaderPPS]) {
715         VAEncPackedHeaderParameterBuffer *param = NULL;
716         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderPPS]->buffer;
717         unsigned int length_in_bits;
718
719         assert(encode_state->packed_header_param[VAEncPackedHeaderPPS]);
720         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderPPS]->buffer;
721         length_in_bits = param->length_in_bits[0];
722
723         gen7_mfc_avc_insert_object(ctx, 
724                 encoder_context,
725                 header_data,
726                 ALIGN(length_in_bits, 32) >> 5,
727                 length_in_bits & 0x1f,
728                 param->skip_emulation_check_count,
729                 0,
730                 0,
731                 param->insert_emulation_bytes);
732     }
733     
734     if ( (rate_control_mode == 0) && encode_state->packed_header_data[VAEncPackedHeaderSPS]) {       // this is frist AU
735
736         struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
737         unsigned char *sei_data = NULL;
738         int length_in_bits = build_avc_sei_buffering_period(mfc_context->vui_hrd.i_initial_cpb_removal_delay_length, 
739                                                             
740                                                             mfc_context->vui_hrd.i_initial_cpb_removal_delay, 0, &sei_data);
741         gen7_mfc_avc_insert_object(ctx, 
742                 encoder_context,
743                 (unsigned int *)sei_data,
744                 ALIGN(length_in_bits, 32) >> 5,
745                 length_in_bits & 0x1f,
746                 4,   
747                 0,   
748                 0,   
749                 1);  
750         free(sei_data);
751     }    
752
753     // SEI pic_timing header
754     if ( rate_control_mode == 0) {   
755         struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
756         unsigned char *sei_data = NULL;
757         int length_in_bits = build_avc_sei_pic_timing( mfc_context->vui_hrd.i_cpb_removal_delay_length,
758                                                        mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
759                                                        mfc_context->vui_hrd.i_dpb_output_delay_length,
760                                                        0, &sei_data);
761         gen7_mfc_avc_insert_object(ctx, 
762                 encoder_context,
763                 (unsigned int *)sei_data,
764                 ALIGN(length_in_bits, 32) >> 5,
765                 length_in_bits & 0x1f,
766                 4,   
767                 0,   
768                 0,   
769                 1);  
770         free(sei_data);
771     }  
772     
773     count++;
774 }
775
776 static void gen7_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
777                                       struct encode_state *encode_state,
778                                       struct intel_encoder_context *encoder_context)
779 {
780     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
781     gen7_mfc_surface_state(ctx, encoder_context);
782     gen7_mfc_ind_obj_base_addr_state(ctx, encoder_context);
783     gen7_mfc_pipe_buf_addr_state(ctx, encoder_context);
784     gen7_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
785     gen7_mfc_avc_img_state(ctx, encode_state, encoder_context);
786     gen7_mfc_avc_qm_state(ctx, encoder_context);
787     gen7_mfc_avc_fqm_state(ctx, encoder_context);
788     gen7_mfc_avc_directmode_state(ctx, encoder_context); 
789     gen7_mfc_avc_ref_idx_state(ctx, encoder_context);
790 }
791
792 static void 
793 gen7_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
794                                        struct encode_state *encode_state,
795                                        struct intel_encoder_context *encoder_context,
796                                        int slice_index)
797 {
798     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
799     struct gen6_vme_context *vme_context = encoder_context->vme_context;
800     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
801     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
802     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
803     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
804     unsigned int *msg = NULL, offset = 0;
805     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
806     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
807     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
808     int last_slice = (pSliceParameter->starting_macroblock_address + pSliceParameter->number_of_mbs) == (width_in_mbs * height_in_mbs);
809     int i,x,y;
810     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
811     int rate_control_mode = pSequenceParameter->rate_control_method;   
812     unsigned char *slice_header = NULL;
813     int slice_header_length_in_bits = 0;
814     unsigned int tail_data[] = { 0x0, 0x0 };
815
816     gen7_mfc_avc_slice_state(ctx, pSliceParameter,
817                              encode_state, encoder_context,
818                              (rate_control_mode == 0), qp);
819
820     if ( slice_index == 0) 
821         gen7_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context);
822
823     if (encode_state->dec_ref_pic_marking)
824         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
825     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
826
827     // slice hander
828     gen7_mfc_avc_insert_object(ctx, encoder_context,
829             (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
830             5,  /* first 5 bytes are start code + nal unit type */
831             1, 0, 1);
832
833     if ( rate_control_mode == 0) {
834         qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
835     }
836
837     if (is_intra) {
838         dri_bo_map(vme_context->vme_output.bo , 1);
839         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
840     }
841    
842     for (i = pSliceParameter->starting_macroblock_address; 
843          i < pSliceParameter->starting_macroblock_address + pSliceParameter->number_of_mbs; i++) {
844         int last_mb = (i == (pSliceParameter->starting_macroblock_address + pSliceParameter->number_of_mbs - 1) );
845         x = i % width_in_mbs;
846         y = i / width_in_mbs;
847
848         if (is_intra) {
849             assert(msg);
850             gen7_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0);
851             msg += 4;
852         } else {
853             gen7_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, encoder_context, 0, 0, pSliceParameter->slice_type);
854             offset += 64;
855         }
856     }
857    
858     if (is_intra)
859         dri_bo_unmap(vme_context->vme_output.bo);
860     if ( last_slice ) {    
861         gen7_mfc_avc_insert_object(ctx, encoder_context,
862                                tail_data, 2, 8,
863                                2, 1, 1, 0);
864     } else {
865         gen7_mfc_avc_insert_object(ctx, encoder_context,
866                                tail_data, 1, 8,
867                                1, 1, 1, 0);
868     }
869
870     free(slice_header);
871
872 }
873
874 static void
875 gen7_mfc_avc_pipeline_programing(VADriverContextP ctx,
876                                  struct encode_state *encode_state,
877                                  struct intel_encoder_context *encoder_context)
878 {
879     struct intel_batchbuffer *batch = encoder_context->base.batch;
880     int i;
881
882     // begin programing
883     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
884     intel_batchbuffer_emit_mi_flush(batch);
885     
886     // picture level programing
887     gen7_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
888
889     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
890         // slice level programing
891         gen7_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i);
892     }
893     
894     // end programing
895     intel_batchbuffer_end_atomic(batch);
896
897     return;
898 }
899
900 static void 
901 gen7_mfc_free_avc_surface(void **data)
902 {
903     struct gen7_mfc_avc_surface_aux *avc_surface = *data;
904
905     if (!avc_surface)
906         return;
907
908     dri_bo_unreference(avc_surface->dmv_top);
909     avc_surface->dmv_top = NULL;
910     dri_bo_unreference(avc_surface->dmv_bottom);
911     avc_surface->dmv_bottom = NULL;
912
913     free(avc_surface);
914     *data = NULL;
915 }
916
917 static void
918 gen7_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
919                                        struct gen7_mfc_context *mfc_context) 
920 {
921     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
922     
923     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
924     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
925     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
926     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
927     int intra_mb_size = inter_mb_size * 5.0;
928     int i;
929     
930     mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
931     mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
932     mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
933     mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
934
935     for(i = 0 ; i < 2; i++) {
936         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
937         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
938         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
939         mfc_context->bit_rate_control_context[i].GrowInit = 6;
940         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
941         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
942         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
943         
944         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
945         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
946         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
947         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
948         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
949         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
950     }
951     
952     mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
953     mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
954
955     mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
956     mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
957 }
958
959 static int
960 gen7_mfc_bit_rate_control_context_update(struct encode_state *encode_state, 
961                                          struct gen7_mfc_context *mfc_context,
962                                          int current_frame_size) 
963 {
964     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
965     int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
966     int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
967
968     if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
969         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
970     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
971         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
972     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
973         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
974     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
975         mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
976     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 )  {
977         mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
978     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 )  {
979         mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
980     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 )  {
981         mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
982     }
983     
984     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
985         mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
986     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
987         mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
988  
989     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
990         return 0;
991
992     return 1;
993 }
994 static void 
995 gen7_mfc_hrd_context_init(struct encode_state *encode_state, 
996                           struct gen7_mfc_context *mfc_context) 
997 {
998     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
999     int rate_control_mode = pSequenceParameter->rate_control_method;   
1000     int target_bit_rate = pSequenceParameter->bits_per_second;
1001     
1002     // current we only support CBR mode.
1003     if ( rate_control_mode == 0) {
1004         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
1005         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
1006         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
1007         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
1008         mfc_context->vui_hrd.i_frame_number = 0;
1009
1010         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
1011         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
1012         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
1013     }
1014
1015 }
1016
1017 static VAStatus
1018 gen7_mfc_hrd_context_check(struct encode_state *encode_state, 
1019                           struct gen7_mfc_context *mfc_context) 
1020 {
1021     return VA_STATUS_SUCCESS;
1022 }
1023
1024 static void 
1025 gen7_mfc_hrd_context_update(struct encode_state *encode_state, 
1026                           struct gen7_mfc_context *mfc_context) 
1027 {
1028     mfc_context->vui_hrd.i_frame_number++;
1029 }
1030
1031 static VAStatus
1032 gen7_mfc_avc_prepare(VADriverContextP ctx, 
1033                      struct encode_state *encode_state,
1034                      struct intel_encoder_context *encoder_context)
1035 {
1036     struct i965_driver_data *i965 = i965_driver_data(ctx);
1037     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
1038     struct object_surface *obj_surface; 
1039     struct object_buffer *obj_buffer;
1040     struct gen7_mfc_avc_surface_aux* gen7_avc_surface;
1041     dri_bo *bo;
1042     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1043     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1044     int rate_control_mode = pSequenceParameter->rate_control_method;   
1045     VAStatus vaStatus = VA_STATUS_SUCCESS;
1046     int i;
1047
1048     /*Setup all the input&output object*/
1049
1050     /* Setup current frame and current direct mv buffer*/
1051     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1052     assert(obj_surface);
1053     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1054
1055     if ( obj_surface->private_data == NULL) {
1056         gen7_avc_surface = calloc(sizeof(struct gen7_mfc_avc_surface_aux), 1);
1057         gen7_avc_surface->dmv_top = 
1058             dri_bo_alloc(i965->intel.bufmgr,
1059                          "Buffer",
1060                          68*8192, 
1061                          64);
1062         gen7_avc_surface->dmv_bottom = 
1063             dri_bo_alloc(i965->intel.bufmgr,
1064                          "Buffer",
1065                          68*8192, 
1066                          64);
1067         assert(gen7_avc_surface->dmv_top);
1068         assert(gen7_avc_surface->dmv_bottom);
1069         obj_surface->private_data = (void *)gen7_avc_surface;
1070         obj_surface->free_private_data = (void *)gen7_mfc_free_avc_surface; 
1071     }
1072
1073     gen7_avc_surface = (struct gen7_mfc_avc_surface_aux*) obj_surface->private_data;
1074     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen7_avc_surface->dmv_top;
1075     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen7_avc_surface->dmv_bottom;
1076     dri_bo_reference(gen7_avc_surface->dmv_top);
1077     dri_bo_reference(gen7_avc_surface->dmv_bottom);
1078
1079     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1080     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1081
1082     mfc_context->surface_state.width = obj_surface->orig_width;
1083     mfc_context->surface_state.height = obj_surface->orig_height;
1084     mfc_context->surface_state.w_pitch = obj_surface->width;
1085     mfc_context->surface_state.h_pitch = obj_surface->height;
1086     
1087     /* Setup reference frames and direct mv buffers*/
1088     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1089         if (pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID) { 
1090             obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1091             assert(obj_surface);
1092             if (obj_surface->bo != NULL) {
1093                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1094                 dri_bo_reference(obj_surface->bo);
1095             }
1096             /* Check DMV buffer */
1097             if (obj_surface->private_data == NULL) {
1098                 
1099                 gen7_avc_surface = calloc(sizeof(struct gen7_mfc_avc_surface_aux), 1);
1100                 gen7_avc_surface->dmv_top = 
1101                     dri_bo_alloc(i965->intel.bufmgr,
1102                                  "Buffer",
1103                                  68*8192, 
1104                                  64);
1105                 gen7_avc_surface->dmv_bottom = 
1106                     dri_bo_alloc(i965->intel.bufmgr,
1107                                  "Buffer",
1108                                  68*8192, 
1109                                  64);
1110                 assert(gen7_avc_surface->dmv_top);
1111                 assert(gen7_avc_surface->dmv_bottom);
1112                 obj_surface->private_data = gen7_avc_surface;
1113                 obj_surface->free_private_data = gen7_mfc_free_avc_surface; 
1114             }
1115     
1116             gen7_avc_surface = (struct gen7_mfc_avc_surface_aux*) obj_surface->private_data;
1117             /* Setup DMV buffer */
1118             mfc_context->direct_mv_buffers[i*2].bo = gen7_avc_surface->dmv_top;
1119             mfc_context->direct_mv_buffers[i*2+1].bo = gen7_avc_surface->dmv_bottom; 
1120             dri_bo_reference(gen7_avc_surface->dmv_top);
1121             dri_bo_reference(gen7_avc_surface->dmv_bottom);
1122         } else {
1123             break;
1124         }
1125     }
1126         
1127     obj_surface = SURFACE(encoder_context->input_yuv_surface);
1128     assert(obj_surface && obj_surface->bo);
1129     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1130     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1131
1132     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1133     bo = obj_buffer->buffer_store->bo;
1134     assert(bo);
1135     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1136     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1137     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1138     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1139
1140     /*Programing bit rate control */
1141     if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
1142         gen7_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1143
1144     /*Programing HRD control */
1145     if ( (rate_control_mode == 0) && (mfc_context->vui_hrd.i_cpb_size_value == 0) )
1146         gen7_mfc_hrd_context_init(encode_state, mfc_context);
1147
1148     /*Programing bcs pipeline*/
1149     gen7_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);       //filling the pipeline
1150         
1151     return vaStatus;
1152 }
1153
1154 static VAStatus
1155 gen7_mfc_run(VADriverContextP ctx, 
1156              struct encode_state *encode_state,
1157              struct intel_encoder_context *encoder_context)
1158 {
1159     struct intel_batchbuffer *batch = encoder_context->base.batch;
1160
1161     intel_batchbuffer_flush(batch);             //run the pipeline
1162
1163     return VA_STATUS_SUCCESS;
1164 }
1165
1166 static VAStatus
1167 gen7_mfc_stop(VADriverContextP ctx, 
1168               struct encode_state *encode_state,
1169               struct intel_encoder_context *encoder_context,
1170               int *encoded_bits_size)
1171 {
1172     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
1173     unsigned int *status_mem;
1174     unsigned int buffer_size_bits = 0;
1175     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1176     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1177     int i;
1178
1179     dri_bo_map(mfc_context->macroblock_status_buffer.bo, 1);
1180     status_mem = (unsigned int *)mfc_context->macroblock_status_buffer.bo->virtual;
1181
1182     //Detecting encoder buffer size and bit rate control result
1183     for(i = 0; i < width_in_mbs * height_in_mbs; i++) {
1184         unsigned short current_mb = status_mem[1] >> 16;
1185         buffer_size_bits += current_mb;
1186         status_mem += 4;
1187     }    
1188
1189     dri_bo_unmap(mfc_context->macroblock_status_buffer.bo);
1190
1191     *encoded_bits_size = buffer_size_bits;
1192
1193     return VA_STATUS_SUCCESS;
1194 }
1195
1196 static VAStatus
1197 gen7_mfc_avc_encode_picture(VADriverContextP ctx, 
1198                             struct encode_state *encode_state,
1199                             struct intel_encoder_context *encoder_context)
1200 {
1201     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1202     struct gen7_mfc_context *mfc_context = encoder_context->mfc_context;
1203     int rate_control_mode = pSequenceParameter->rate_control_method;  
1204     int MAX_CBR_INTERATE = 4;
1205     int current_frame_bits_size;
1206     int i;
1207  
1208     for(i = 0; i < MAX_CBR_INTERATE; i++) {
1209         gen7_mfc_init(ctx, encoder_context);
1210         gen7_mfc_avc_prepare(ctx, encode_state, encoder_context);
1211         gen7_mfc_run(ctx, encode_state, encoder_context);
1212         gen7_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1213
1214         if (rate_control_mode == 0) {
1215             //gen7_mfc_hrd_context_check(encode_state, mfc_context);
1216             if (gen7_mfc_bit_rate_control_context_update( encode_state, mfc_context, current_frame_bits_size)) {
1217                 gen7_mfc_hrd_context_update(encode_state, mfc_context);
1218                 break;
1219             }
1220         } else {
1221             break;
1222         }
1223     }
1224
1225     return VA_STATUS_SUCCESS;
1226 }
1227
1228 static VAStatus
1229 gen7_mfc_pipeline(VADriverContextP ctx,
1230                   VAProfile profile,
1231                   struct encode_state *encode_state,
1232                   struct intel_encoder_context *encoder_context)
1233 {
1234     VAStatus vaStatus;
1235
1236     switch (profile) {
1237     case VAProfileH264Baseline:
1238         vaStatus = gen7_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1239         break;
1240
1241         /* FIXME: add for other profile */
1242     default:
1243         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1244         break;
1245     }
1246
1247     return vaStatus;
1248 }
1249
1250 static void
1251 gen7_mfc_context_destroy(void *context)
1252 {
1253     struct gen7_mfc_context *mfc_context = context;
1254     int i;
1255
1256     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1257     mfc_context->post_deblocking_output.bo = NULL;
1258
1259     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1260     mfc_context->pre_deblocking_output.bo = NULL;
1261
1262     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1263     mfc_context->uncompressed_picture_source.bo = NULL;
1264
1265     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1266     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1267
1268     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1269         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1270         mfc_context->direct_mv_buffers[i].bo = NULL;
1271     }
1272
1273     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1274     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1275
1276     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1277     mfc_context->macroblock_status_buffer.bo = NULL;
1278
1279     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1280     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1281
1282     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1283     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1284
1285
1286     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1287         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1288         mfc_context->reference_surfaces[i].bo = NULL;  
1289     }
1290
1291     free(mfc_context);
1292 }
1293
1294 Bool
1295 gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1296 {
1297     encoder_context->mfc_context = calloc(1, sizeof(struct gen7_mfc_context));
1298     encoder_context->mfc_context_destroy = gen7_mfc_context_destroy;
1299     encoder_context->mfc_pipeline = gen7_mfc_pipeline;
1300
1301     return True;
1302 }