e500feb87443e74ff0bcab4d0f83c0d08cabeca2
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define BRC_CLIP(x, min, max)                                   \
47     {                                                           \
48         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
49     }
50
51 #define BRC_P_B_QP_DIFF 4
52 #define BRC_I_P_QP_DIFF 2
53 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
54
55 #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
56 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
57
58 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
59 #define BRC_CY 0.1 /* weight for */
60 #define BRC_CX_UNDERFLOW 5.
61 #define BRC_CX_OVERFLOW -4.
62
63 #define BRC_PI_0_5 1.5707963267948966192313216916398
64
65 #ifndef HAVE_LOG2F
66 #define log2f(x) (logf(x)/(float)M_LN2)
67 #endif
68
69 int intel_avc_enc_slice_type_fixup(int slice_type)
70 {
71     if (slice_type == SLICE_TYPE_SP ||
72         slice_type == SLICE_TYPE_P)
73         slice_type = SLICE_TYPE_P;
74     else if (slice_type == SLICE_TYPE_SI ||
75              slice_type == SLICE_TYPE_I)
76         slice_type = SLICE_TYPE_I;
77     else {
78         if (slice_type != SLICE_TYPE_B)
79             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
80
81         slice_type = SLICE_TYPE_B;
82     }
83
84     return slice_type;
85 }
86
87 static void
88 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
89                                         struct gen6_mfc_context *mfc_context)
90 {
91     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
92     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
93     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
94     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
95     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
96     int intra_mb_size = inter_mb_size * 5.0;
97     int i;
98
99     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
100     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
101     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
102     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
103     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
104     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
105
106     for(i = 0 ; i < 3; i++) {
107         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
108         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
109         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
110         mfc_context->bit_rate_control_context[i].GrowInit = 6;
111         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
112         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
113         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
114         
115         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
116         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
117         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
118         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
119         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
120         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
121     }
122     
123     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
124     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
125     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
126
127     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
128     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
129     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
130 }
131
132 static void intel_mfc_brc_init(struct encode_state *encode_state,
133                                struct intel_encoder_context* encoder_context)
134 {
135     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
136     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
137     VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
138     VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
139     double bitrate = pSequenceParameter->bits_per_second;
140     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
141     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
142     int intra_period = pSequenceParameter->intra_period;
143     int ip_period = pSequenceParameter->ip_period;
144     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
145     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
146     double bpf;
147
148     if (pSequenceParameter->ip_period) {
149         pnum = (intra_period + ip_period - 1)/ip_period - 1;
150         bnum = intra_period - inum - pnum;
151     }
152
153     mfc_context->brc.mode = encoder_context->rate_control_mode;
154
155     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
156                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
157     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
158     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
159
160     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
161     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
162     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
163
164     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
165
166     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
167     mfc_context->hrd.current_buffer_fullness =
168         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
169         pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
170     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
171     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
172     mfc_context->hrd.violation_noted = 0;
173
174     if ((bpf > qp51_size) && (bpf < qp1_size)) {
175         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
176     }
177     else if (bpf >= qp1_size)
178         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
179     else if (bpf <= qp51_size)
180         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
181
182     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
183     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
184
185     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
186     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
187     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
188 }
189
190 int intel_mfc_update_hrd(struct encode_state *encode_state,
191                          struct gen6_mfc_context *mfc_context,
192                          int frame_bits)
193 {
194     double prev_bf = mfc_context->hrd.current_buffer_fullness;
195
196     mfc_context->hrd.current_buffer_fullness -= frame_bits;
197
198     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
199         mfc_context->hrd.current_buffer_fullness = prev_bf;
200         return BRC_UNDERFLOW;
201     }
202     
203     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
204     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
205         if (mfc_context->brc.mode == VA_RC_VBR)
206             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
207         else {
208             mfc_context->hrd.current_buffer_fullness = prev_bf;
209             return BRC_OVERFLOW;
210         }
211     }
212     return BRC_NO_HRD_VIOLATION;
213 }
214
215 int intel_mfc_brc_postpack(struct encode_state *encode_state,
216                            struct gen6_mfc_context *mfc_context,
217                            int frame_bits)
218 {
219     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
220     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
221     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
222     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
223     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
224     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
225     int qp; // quantizer of previously encoded slice of current type
226     int qpn; // predicted quantizer for next frame of current type in integer format
227     double qpf; // predicted quantizer for next frame of current type in float format
228     double delta_qp; // QP correction
229     int target_frame_size, frame_size_next;
230     /* Notes:
231      *  x - how far we are from HRD buffer borders
232      *  y - how far we are from target HRD buffer fullness
233      */
234     double x, y;
235     double frame_size_alpha;
236
237     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
238
239     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
240     if (mfc_context->hrd.buffer_capacity < 5)
241         frame_size_alpha = 0;
242     else
243         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
244     if (frame_size_alpha > 30) frame_size_alpha = 30;
245     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
246         (double)(frame_size_alpha + 1.);
247
248     /* frame_size_next: avoiding negative number and too small value */
249     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
250         frame_size_next = (int)((double)target_frame_size * 0.25);
251
252     qpf = (double)qp * target_frame_size / frame_size_next;
253     qpn = (int)(qpf + 0.5);
254
255     if (qpn == qp) {
256         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
257         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
258         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
259             qpn++;
260             mfc_context->brc.qpf_rounding_accumulator = 0.;
261         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
262             qpn--;
263             mfc_context->brc.qpf_rounding_accumulator = 0.;
264         }
265     }
266     /* making sure that QP is not changing too fast */
267     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
268     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
269     /* making sure that with QP predictions we did do not leave QPs range */
270     BRC_CLIP(qpn, 1, 51);
271
272     /* checking wthether HRD compliance is still met */
273     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
274
275     /* calculating QP delta as some function*/
276     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
277     if (x > 0) {
278         x /= mfc_context->hrd.target_buffer_fullness;
279         y = mfc_context->hrd.current_buffer_fullness;
280     }
281     else {
282         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
283         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
284     }
285     if (y < 0.01) y = 0.01;
286     if (x > 1) x = 1;
287     else if (x < -1) x = -1;
288
289     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
290     qpn = (int)(qpn + delta_qp + 0.5);
291
292     /* making sure that with QP predictions we did do not leave QPs range */
293     BRC_CLIP(qpn, 1, 51);
294
295     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
296         /* correcting QPs of slices of other types */
297         if (slicetype == SLICE_TYPE_P) {
298             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
299                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
300             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
301                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
302         } else if (slicetype == SLICE_TYPE_I) {
303             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
304                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
305             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
306                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
307         } else { // SLICE_TYPE_B
308             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
309                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
310             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
311                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
312         }
313         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
314         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
315         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
316     } else if (sts == BRC_UNDERFLOW) { // underflow
317         if (qpn <= qp) qpn = qp + 1;
318         if (qpn > 51) {
319             qpn = 51;
320             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
321         }
322     } else if (sts == BRC_OVERFLOW) {
323         if (qpn >= qp) qpn = qp - 1;
324         if (qpn < 1) { // < 0 (?) overflow with minQP
325             qpn = 1;
326             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
327         }
328     }
329
330     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
331
332     return sts;
333 }
334
335 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
336                                        struct intel_encoder_context *encoder_context)
337 {
338     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
339     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
340     unsigned int rate_control_mode = encoder_context->rate_control_mode;
341     int target_bit_rate = pSequenceParameter->bits_per_second;
342     
343     // current we only support CBR mode.
344     if (rate_control_mode == VA_RC_CBR) {
345         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
346         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
347         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
348         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
349         mfc_context->vui_hrd.i_frame_number = 0;
350
351         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
352         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
353         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
354     }
355
356 }
357
358 void 
359 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
360                              struct gen6_mfc_context *mfc_context)
361 {
362     mfc_context->vui_hrd.i_frame_number++;
363 }
364
365 int intel_mfc_interlace_check(VADriverContextP ctx,
366                               struct encode_state *encode_state,
367                               struct intel_encoder_context *encoder_context)
368 {
369     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
370     VAEncSliceParameterBufferH264 *pSliceParameter;
371     int i;
372     int mbCount = 0;
373     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
374     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
375   
376     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
377         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
378         mbCount += pSliceParameter->num_macroblocks; 
379     }
380     
381     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
382         return 0;
383
384     return 1;
385 }
386
387 /*
388  * Check whether the parameters related with CBR are updated and decide whether
389  * it needs to reinitialize the configuration related with CBR.
390  * Currently it will check the following parameters:
391  *      bits_per_second
392  *      frame_rate
393  *      gop_configuration(intra_period, ip_period, intra_idr_period)
394  */
395 static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
396                            struct intel_encoder_context *encoder_context)
397 {
398     unsigned int rate_control_mode = encoder_context->rate_control_mode;
399     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
400     double cur_fps, cur_bitrate;
401     VAEncSequenceParameterBufferH264 *pSequenceParameter;
402
403
404     if (rate_control_mode != VA_RC_CBR) {
405         return false;
406     }
407
408     pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
409
410     cur_bitrate = pSequenceParameter->bits_per_second;
411     cur_fps = (double)pSequenceParameter->time_scale /
412                 (2 * (double)pSequenceParameter->num_units_in_tick);
413
414     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
415         (cur_fps == mfc_context->brc.saved_fps) &&
416         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
417         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
418         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
419         /* the parameters related with CBR are not updaetd */
420         return false;
421     }
422
423     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
424     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
425     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
426     mfc_context->brc.saved_fps = cur_fps;
427     mfc_context->brc.saved_bps = cur_bitrate;
428     return true;
429 }
430
431 void intel_mfc_brc_prepare(struct encode_state *encode_state,
432                            struct intel_encoder_context *encoder_context)
433 {
434     unsigned int rate_control_mode = encoder_context->rate_control_mode;
435     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
436
437     if (rate_control_mode == VA_RC_CBR) {
438         bool brc_updated;
439         assert(encoder_context->codec != CODEC_MPEG2);
440
441         brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
442
443         /*Programing bit rate control */
444         if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
445              brc_updated) {
446             intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
447             intel_mfc_brc_init(encode_state, encoder_context);
448         }
449
450         /*Programing HRD control */
451         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
452             intel_mfc_hrd_context_init(encode_state, encoder_context);    
453     }
454 }
455
456 static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length)
457 {
458     int i, found;
459     int leading_zero_cnt, byte_length, zero_byte;
460     int nal_unit_type;
461     int skip_cnt = 0;
462
463 #define NAL_UNIT_TYPE_MASK 0x1f
464 #define HW_MAX_SKIP_LENGTH 15
465
466     byte_length = ALIGN(bits_length, 32) >> 3;
467
468
469     leading_zero_cnt = 0;
470     found = 0;
471     for(i = 0; i < byte_length - 4; i++) {
472         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
473             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
474                 found = 1;
475                 break;
476             }
477         leading_zero_cnt++;
478     }
479     if (!found) {
480         /* warning message is complained. But anyway it will be inserted. */
481         WARN_ONCE("Invalid packed header data. "
482                    "Can't find the 000001 start_prefix code\n");
483         return 0;
484     }
485     i = leading_zero_cnt;
486
487     zero_byte = 0;
488     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
489         zero_byte = 1;
490
491     skip_cnt = leading_zero_cnt + zero_byte + 3;
492
493     /* the unit header byte is accounted */
494     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
495     skip_cnt += 1;
496
497     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
498         /* more unit header bytes are accounted for MVC/SVC */
499         skip_cnt += 3;
500     }
501     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
502         WARN_ONCE("Too many leading zeros are padded for packed data. "
503                    "It is beyond the HW range.!!!\n");
504     }
505     return skip_cnt;
506 }
507
508 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
509                                               struct encode_state *encode_state,
510                                               struct intel_encoder_context *encoder_context,
511                                               struct intel_batchbuffer *slice_batch)
512 {
513     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
514     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
515     unsigned int rate_control_mode = encoder_context->rate_control_mode;
516     unsigned int skip_emul_byte_cnt;
517
518     if (encode_state->packed_header_data[idx]) {
519         VAEncPackedHeaderParameterBuffer *param = NULL;
520         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
521         unsigned int length_in_bits;
522
523         assert(encode_state->packed_header_param[idx]);
524         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
525         length_in_bits = param->bit_length;
526
527         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
528         mfc_context->insert_object(ctx,
529                                    encoder_context,
530                                    header_data,
531                                    ALIGN(length_in_bits, 32) >> 5,
532                                    length_in_bits & 0x1f,
533                                    skip_emul_byte_cnt,
534                                    0,
535                                    0,
536                                    !param->has_emulation_bytes,
537                                    slice_batch);
538     }
539
540     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
541
542     if (encode_state->packed_header_data[idx]) {
543         VAEncPackedHeaderParameterBuffer *param = NULL;
544         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
545         unsigned int length_in_bits;
546
547         assert(encode_state->packed_header_param[idx]);
548         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
549         length_in_bits = param->bit_length;
550
551         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
552
553         mfc_context->insert_object(ctx,
554                                    encoder_context,
555                                    header_data,
556                                    ALIGN(length_in_bits, 32) >> 5,
557                                    length_in_bits & 0x1f,
558                                    skip_emul_byte_cnt,
559                                    0,
560                                    0,
561                                    !param->has_emulation_bytes,
562                                    slice_batch);
563     }
564     
565     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
566
567     if (encode_state->packed_header_data[idx]) {
568         VAEncPackedHeaderParameterBuffer *param = NULL;
569         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
570         unsigned int length_in_bits;
571
572         assert(encode_state->packed_header_param[idx]);
573         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
574         length_in_bits = param->bit_length;
575
576         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
577         mfc_context->insert_object(ctx,
578                                    encoder_context,
579                                    header_data,
580                                    ALIGN(length_in_bits, 32) >> 5,
581                                    length_in_bits & 0x1f,
582                                    skip_emul_byte_cnt,
583                                    0,
584                                    0,
585                                    !param->has_emulation_bytes,
586                                    slice_batch);
587     } else if (rate_control_mode == VA_RC_CBR) {
588         // this is frist AU
589         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
590
591         unsigned char *sei_data = NULL;
592     
593         int length_in_bits = build_avc_sei_buffer_timing(
594             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
595             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
596             0,
597             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
598             mfc_context->vui_hrd.i_dpb_output_delay_length,
599             0,
600             &sei_data);
601         mfc_context->insert_object(ctx,
602                                    encoder_context,
603                                    (unsigned int *)sei_data,
604                                    ALIGN(length_in_bits, 32) >> 5,
605                                    length_in_bits & 0x1f,
606                                    4,   
607                                    0,   
608                                    0,   
609                                    1,
610                                    slice_batch);  
611         free(sei_data);
612     }
613 }
614
615 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
616                                struct encode_state *encode_state,
617                                struct intel_encoder_context *encoder_context)
618 {
619     struct i965_driver_data *i965 = i965_driver_data(ctx);
620     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
621     struct object_surface *obj_surface; 
622     struct object_buffer *obj_buffer;
623     GenAvcSurface *gen6_avc_surface;
624     dri_bo *bo;
625     VAStatus vaStatus = VA_STATUS_SUCCESS;
626     int i, j, enable_avc_ildb = 0;
627     VAEncSliceParameterBufferH264 *slice_param;
628     struct i965_coded_buffer_segment *coded_buffer_segment;
629     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
630     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
631     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
632
633     if (IS_GEN6(i965->intel.device_info)) {
634         /* On the SNB it should be fixed to 128 for the DMV buffer */
635         width_in_mbs = 128;
636     }
637
638     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
639         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
640         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
641
642         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
643             assert((slice_param->slice_type == SLICE_TYPE_I) ||
644                    (slice_param->slice_type == SLICE_TYPE_SI) ||
645                    (slice_param->slice_type == SLICE_TYPE_P) ||
646                    (slice_param->slice_type == SLICE_TYPE_SP) ||
647                    (slice_param->slice_type == SLICE_TYPE_B));
648
649             if (slice_param->disable_deblocking_filter_idc != 1) {
650                 enable_avc_ildb = 1;
651                 break;
652             }
653
654             slice_param++;
655         }
656     }
657
658     /*Setup all the input&output object*/
659
660     /* Setup current frame and current direct mv buffer*/
661     obj_surface = encode_state->reconstructed_object;
662     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
663
664     if ( obj_surface->private_data == NULL) {
665         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
666         gen6_avc_surface->dmv_top = 
667             dri_bo_alloc(i965->intel.bufmgr,
668                          "Buffer",
669                          68 * width_in_mbs * height_in_mbs, 
670                          64);
671         gen6_avc_surface->dmv_bottom = 
672             dri_bo_alloc(i965->intel.bufmgr,
673                          "Buffer",
674                          68 * width_in_mbs * height_in_mbs, 
675                          64);
676         assert(gen6_avc_surface->dmv_top);
677         assert(gen6_avc_surface->dmv_bottom);
678         obj_surface->private_data = (void *)gen6_avc_surface;
679         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
680     }
681     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
682     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
683     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
684     dri_bo_reference(gen6_avc_surface->dmv_top);
685     dri_bo_reference(gen6_avc_surface->dmv_bottom);
686
687     if (enable_avc_ildb) {
688         mfc_context->post_deblocking_output.bo = obj_surface->bo;
689         dri_bo_reference(mfc_context->post_deblocking_output.bo);
690     } else {
691         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
692         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
693     }
694
695     mfc_context->surface_state.width = obj_surface->orig_width;
696     mfc_context->surface_state.height = obj_surface->orig_height;
697     mfc_context->surface_state.w_pitch = obj_surface->width;
698     mfc_context->surface_state.h_pitch = obj_surface->height;
699     
700     /* Setup reference frames and direct mv buffers*/
701     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
702         obj_surface = encode_state->reference_objects[i];
703         
704         if (obj_surface && obj_surface->bo) {
705             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
706             dri_bo_reference(obj_surface->bo);
707
708             /* Check DMV buffer */
709             if ( obj_surface->private_data == NULL) {
710                 
711                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
712                 gen6_avc_surface->dmv_top = 
713                     dri_bo_alloc(i965->intel.bufmgr,
714                                  "Buffer",
715                                  68 * width_in_mbs * height_in_mbs, 
716                                  64);
717                 gen6_avc_surface->dmv_bottom = 
718                     dri_bo_alloc(i965->intel.bufmgr,
719                                  "Buffer",
720                                  68 * width_in_mbs * height_in_mbs, 
721                                  64);
722                 assert(gen6_avc_surface->dmv_top);
723                 assert(gen6_avc_surface->dmv_bottom);
724                 obj_surface->private_data = gen6_avc_surface;
725                 obj_surface->free_private_data = gen_free_avc_surface; 
726             }
727     
728             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
729             /* Setup DMV buffer */
730             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
731             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
732             dri_bo_reference(gen6_avc_surface->dmv_top);
733             dri_bo_reference(gen6_avc_surface->dmv_bottom);
734         } else {
735             break;
736         }
737     }
738         
739     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
740     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
741
742     obj_buffer = encode_state->coded_buf_object;
743     bo = obj_buffer->buffer_store->bo;
744     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
745     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
746     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
747     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
748     
749     dri_bo_map(bo, 1);
750     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
751     coded_buffer_segment->mapped = 0;
752     coded_buffer_segment->codec = encoder_context->codec;
753     dri_bo_unmap(bo);
754
755     return vaStatus;
756 }
757 /*
758  * The LUT uses the pair of 4-bit units: (shift, base) structure.
759  * 2^K * X = value . 
760  * So it is necessary to convert one cost into the nearest LUT format.
761  * The derivation is:
762  * 2^K *x = 2^n * (1 + deltaX)
763  *    k + log2(x) = n + log2(1 + deltaX)
764  *    log2(x) = n - k + log2(1 + deltaX)
765  *    As X is in the range of [1, 15]
766  *      4 > n - k + log2(1 + deltaX) >= 0 
767  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
768  *    Then we can derive the corresponding K and get the nearest LUT format.
769  */
770 int intel_format_lutvalue(int value, int max)
771 {
772     int ret;
773     int logvalue, temp1, temp2;
774
775     if (value <= 0)
776         return 0;
777
778     logvalue = (int)(log2f((float)value));
779     if (logvalue < 4) {
780         ret = value;
781     } else {
782         int error, temp_value, base, j, temp_err;
783         error = value;
784         j = logvalue - 4 + 1;
785         ret = -1;
786         for(; j <= logvalue; j++) {
787             if (j == 0) {
788                 base = value >> j;
789             } else {
790                 base = (value + (1 << (j - 1)) - 1) >> j;
791             }
792             if (base >= 16)
793                 continue;
794
795             temp_value = base << j;
796             temp_err = abs(value - temp_value);
797             if (temp_err < error) {
798                 error = temp_err;
799                 ret = (j << 4) | base;
800                 if (temp_err == 0)
801                     break;
802             }
803         }
804     }
805     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
806     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
807     if (temp1 > temp2)
808         ret = max;
809     return ret;
810         
811 }
812
813
814 #define         QP_MAX                  52
815
816
817 static float intel_lambda_qp(int qp)
818 {
819     float value, lambdaf;
820     value = qp;
821     value = value / 6 - 2;
822     if (value < 0)
823         value = 0;
824     lambdaf = roundf(powf(2, value));
825     return lambdaf;
826 }
827
828
829 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
830                                 struct encode_state *encode_state,
831                                 struct intel_encoder_context *encoder_context)
832 {
833     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
834     struct gen6_vme_context *vme_context = encoder_context->vme_context;
835     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
836     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
837     int qp, m_cost, j, mv_count;
838     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
839     float   lambda, m_costf;
840
841     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
842
843     
844     if (encoder_context->rate_control_mode == VA_RC_CQP)
845         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
846     else
847         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
848   
849     if (vme_state_message == NULL)
850         return;
851  
852     assert(qp <= QP_MAX); 
853     lambda = intel_lambda_qp(qp);
854     if (slice_type == SLICE_TYPE_I) {
855         vme_state_message[MODE_INTRA_16X16] = 0;
856         m_cost = lambda * 4;
857         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
858         m_cost = lambda * 16; 
859         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
860         m_cost = lambda * 3;
861         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
862     } else {
863         m_cost = 0;
864         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
865         for (j = 1; j < 3; j++) {
866             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
867             m_cost = (int)m_costf;
868             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
869         }
870         mv_count = 3;
871         for (j = 4; j <= 64; j *= 2) {
872             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
873             m_cost = (int)m_costf;
874             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
875             mv_count++;
876         }
877
878         if (qp <= 25) {
879             vme_state_message[MODE_INTRA_16X16] = 0x4a;
880             vme_state_message[MODE_INTRA_8X8] = 0x4a;
881             vme_state_message[MODE_INTRA_4X4] = 0x4a;
882             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
883             vme_state_message[MODE_INTER_16X16] = 0x4a;
884             vme_state_message[MODE_INTER_16X8] = 0x4a;
885             vme_state_message[MODE_INTER_8X8] = 0x4a;
886             vme_state_message[MODE_INTER_8X4] = 0x4a;
887             vme_state_message[MODE_INTER_4X4] = 0x4a;
888             vme_state_message[MODE_INTER_BWD] = 0x2a;
889             return;
890         }
891         m_costf = lambda * 10;
892         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
893         m_cost = lambda * 14;
894         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
895         m_cost = lambda * 24; 
896         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
897         m_costf = lambda * 3.5;
898         m_cost = m_costf;
899         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
900         if (slice_type == SLICE_TYPE_P) {
901             m_costf = lambda * 2.5;
902             m_cost = m_costf;
903             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
904             m_costf = lambda * 4;
905             m_cost = m_costf;
906             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
907             m_costf = lambda * 1.5;
908             m_cost = m_costf;
909             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
910             m_costf = lambda * 3;
911             m_cost = m_costf;
912             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
913             m_costf = lambda * 5;
914             m_cost = m_costf;
915             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
916             /* BWD is not used in P-frame */
917             vme_state_message[MODE_INTER_BWD] = 0;
918         } else {
919             m_costf = lambda * 2.5;
920             m_cost = m_costf;
921             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
922             m_costf = lambda * 5.5;
923             m_cost = m_costf;
924             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
925             m_costf = lambda * 3.5;
926             m_cost = m_costf;
927             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
928             m_costf = lambda * 5.0;
929             m_cost = m_costf;
930             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
931             m_costf = lambda * 6.5;
932             m_cost = m_costf;
933             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
934             m_costf = lambda * 1.5;
935             m_cost = m_costf;
936             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
937         }
938     }
939 }
940
941
942 #define         MB_SCOREBOARD_A         (1 << 0)
943 #define         MB_SCOREBOARD_B         (1 << 1)
944 #define         MB_SCOREBOARD_C         (1 << 2)
945 void 
946 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
947 {
948     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
949     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
950     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
951                                                            MB_SCOREBOARD_B |
952                                                            MB_SCOREBOARD_C);
953
954     /* In VME prediction the current mb depends on the neighbour 
955      * A/B/C macroblock. So the left/up/up-right dependency should
956      * be considered.
957      */
958     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
959     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
960     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
961     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
962     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
963     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
964         
965     vme_context->gpe_context.vfe_desc7.dword = 0;
966     return;
967 }
968
969 /* check whether the mb of (x_index, y_index) is out of bound */
970 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
971 {
972     int mb_index;
973     if (x_index < 0 || x_index >= mb_width)
974         return -1;
975     if (y_index < 0 || y_index >= mb_height)
976         return -1;
977         
978     mb_index = y_index * mb_width + x_index;
979     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
980         return -1;
981     return 0;
982 }
983
984 void
985 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
986                                      struct encode_state *encode_state,
987                                      int mb_width, int mb_height,
988                                      int kernel,
989                                      int transform_8x8_mode_flag,
990                                      struct intel_encoder_context *encoder_context)
991 {
992     struct gen6_vme_context *vme_context = encoder_context->vme_context;
993     int mb_row;
994     int s;
995     unsigned int *command_ptr;
996
997 #define         USE_SCOREBOARD          (1 << 21)
998  
999     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1000     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1001
1002     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1003         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1004         int first_mb = pSliceParameter->macroblock_address;
1005         int num_mb = pSliceParameter->num_macroblocks;
1006         unsigned int mb_intra_ub, score_dep;
1007         int x_outer, y_outer, x_inner, y_inner;
1008         int xtemp_outer = 0;
1009
1010         x_outer = first_mb % mb_width;
1011         y_outer = first_mb / mb_width;
1012         mb_row = y_outer;
1013                                  
1014         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1015             x_inner = x_outer;
1016             y_inner = y_outer;
1017             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1018                 mb_intra_ub = 0;
1019                 score_dep = 0;
1020                 if (x_inner != 0) {
1021                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1022                     score_dep |= MB_SCOREBOARD_A; 
1023                 }
1024                 if (y_inner != mb_row) {
1025                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1026                     score_dep |= MB_SCOREBOARD_B;
1027                     if (x_inner != 0)
1028                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1029                     if (x_inner != (mb_width -1)) {
1030                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1031                         score_dep |= MB_SCOREBOARD_C;
1032                     }
1033                 }
1034                                                         
1035                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1036                 *command_ptr++ = kernel;
1037                 *command_ptr++ = USE_SCOREBOARD;
1038                 /* Indirect data */
1039                 *command_ptr++ = 0;
1040                 /* the (X, Y) term of scoreboard */
1041                 *command_ptr++ = ((y_inner << 16) | x_inner);
1042                 *command_ptr++ = score_dep;
1043                 /*inline data */
1044                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1045                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1046                 x_inner -= 2;
1047                 y_inner += 1;
1048             }
1049             x_outer += 1;
1050         }
1051
1052         xtemp_outer = mb_width - 2;
1053         if (xtemp_outer < 0)
1054             xtemp_outer = 0;
1055         x_outer = xtemp_outer;
1056         y_outer = first_mb / mb_width;
1057         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1058             y_inner = y_outer;
1059             x_inner = x_outer;
1060             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1061                 mb_intra_ub = 0;
1062                 score_dep = 0;
1063                 if (x_inner != 0) {
1064                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1065                     score_dep |= MB_SCOREBOARD_A; 
1066                 }
1067                 if (y_inner != mb_row) {
1068                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1069                     score_dep |= MB_SCOREBOARD_B;
1070                     if (x_inner != 0)
1071                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1072
1073                     if (x_inner != (mb_width -1)) {
1074                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1075                         score_dep |= MB_SCOREBOARD_C;
1076                     }
1077                 }
1078
1079                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1080                 *command_ptr++ = kernel;
1081                 *command_ptr++ = USE_SCOREBOARD;
1082                 /* Indirect data */
1083                 *command_ptr++ = 0;
1084                 /* the (X, Y) term of scoreboard */
1085                 *command_ptr++ = ((y_inner << 16) | x_inner);
1086                 *command_ptr++ = score_dep;
1087                 /*inline data */
1088                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1089                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1090
1091                 x_inner -= 2;
1092                 y_inner += 1;
1093             }
1094             x_outer++;
1095             if (x_outer >= mb_width) {
1096                 y_outer += 1;
1097                 x_outer = xtemp_outer;
1098             }           
1099         }
1100     }
1101
1102     *command_ptr++ = 0;
1103     *command_ptr++ = MI_BATCH_BUFFER_END;
1104
1105     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1106 }
1107
1108 static uint8_t
1109 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1110 {
1111     unsigned int is_long_term =
1112         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1113     unsigned int is_top_field =
1114         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1115     unsigned int is_bottom_field =
1116         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1117
1118     return ((is_long_term                         << 6) |
1119             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1120             (frame_store_id                       << 1) |
1121             ((is_top_field ^ 1) & is_bottom_field));
1122 }
1123
1124 void
1125 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1126                             struct encode_state *encode_state,
1127                             struct intel_encoder_context *encoder_context)
1128 {
1129     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1130     struct intel_batchbuffer *batch = encoder_context->base.batch;
1131     int slice_type;
1132     struct object_surface *obj_surface;
1133     unsigned int fref_entry, bref_entry;
1134     int frame_index, i;
1135     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1136
1137     fref_entry = 0x80808080;
1138     bref_entry = 0x80808080;
1139     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1140
1141     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1142         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1143
1144         if (ref_idx_l0 > 3) {
1145             WARN_ONCE("ref_idx_l0 is out of range\n");
1146             ref_idx_l0 = 0;
1147         }
1148
1149         obj_surface = vme_context->used_reference_objects[0];
1150         frame_index = -1;
1151         for (i = 0; i < 16; i++) {
1152             if (obj_surface &&
1153                 obj_surface == encode_state->reference_objects[i]) {
1154                 frame_index = i;
1155                 break;
1156             }
1157         }
1158         if (frame_index == -1) {
1159             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1160         } else {
1161             int ref_idx_l0_shift = ref_idx_l0 * 8;
1162             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1163             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1164         }
1165     }
1166
1167     if (slice_type == SLICE_TYPE_B) {
1168         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1169
1170         if (ref_idx_l1 > 3) {
1171             WARN_ONCE("ref_idx_l1 is out of range\n");
1172             ref_idx_l1 = 0;
1173         }
1174
1175         obj_surface = vme_context->used_reference_objects[1];
1176         frame_index = -1;
1177         for (i = 0; i < 16; i++) {
1178             if (obj_surface &&
1179                 obj_surface == encode_state->reference_objects[i]) {
1180                 frame_index = i;
1181                 break;
1182             }
1183         }
1184         if (frame_index == -1) {
1185             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1186         } else {
1187             int ref_idx_l1_shift = ref_idx_l1 * 8;
1188             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1189             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1190         }
1191     }
1192
1193     BEGIN_BCS_BATCH(batch, 10);
1194     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1195     OUT_BCS_BATCH(batch, 0);                  //Select L0
1196     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1197     for(i = 0; i < 7; i++) {
1198         OUT_BCS_BATCH(batch, 0x80808080);
1199     }
1200     ADVANCE_BCS_BATCH(batch);
1201
1202     BEGIN_BCS_BATCH(batch, 10);
1203     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1204     OUT_BCS_BATCH(batch, 1);                  //Select L1
1205     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1206     for(i = 0; i < 7; i++) {
1207         OUT_BCS_BATCH(batch, 0x80808080);
1208     }
1209     ADVANCE_BCS_BATCH(batch);
1210 }
1211
1212
1213 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1214                                  struct encode_state *encode_state,
1215                                  struct intel_encoder_context *encoder_context)
1216 {
1217     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1218     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1219     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1220     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1221     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1222     uint32_t mv_x, mv_y;
1223     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1224     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1225     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1226
1227     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1228         mv_x = 512;
1229         mv_y = 64;
1230     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1231         mv_x = 1024;
1232         mv_y = 128;
1233     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1234         mv_x = 2048;
1235         mv_y = 128;
1236     } else {
1237         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1238         mv_x = 512;
1239         mv_y = 64;
1240     }
1241
1242     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1243     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1244         int qp, m_cost, j, mv_count;
1245         float   lambda, m_costf;
1246         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1247             encode_state->slice_params_ext[0]->buffer;
1248         qp = slice_param->quantiser_scale_code;
1249         lambda = intel_lambda_qp(qp);
1250         /* No Intra prediction. So it is zero */
1251         vme_state_message[MODE_INTRA_8X8] = 0;
1252         vme_state_message[MODE_INTRA_4X4] = 0;
1253         vme_state_message[MODE_INTER_MV0] = 0;
1254         for (j = 1; j < 3; j++) {
1255             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1256             m_cost = (int)m_costf;
1257             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1258         }
1259         mv_count = 3;
1260         for (j = 4; j <= 64; j *= 2) {
1261             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1262             m_cost = (int)m_costf;
1263             vme_state_message[MODE_INTER_MV0 + mv_count] =
1264                 intel_format_lutvalue(m_cost, 0x6f);
1265             mv_count++;
1266         }
1267         m_cost = lambda;
1268         /* It can only perform the 16x16 search. So mode cost can be ignored for
1269          * the other mode. for example: 16x8/8x8
1270          */
1271         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1272         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1273                         
1274         vme_state_message[MODE_INTER_16X8] = 0;
1275         vme_state_message[MODE_INTER_8X8] = 0;
1276         vme_state_message[MODE_INTER_8X4] = 0;
1277         vme_state_message[MODE_INTER_4X4] = 0;
1278         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1279
1280     }
1281     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1282
1283     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1284         width_in_mbs;
1285 }
1286
1287 void
1288 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1289                                            struct encode_state *encode_state,
1290                                            int mb_width, int mb_height,
1291                                            int kernel,
1292                                            struct intel_encoder_context *encoder_context)
1293 {
1294     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1295     unsigned int *command_ptr;
1296
1297 #define         MPEG2_SCOREBOARD                (1 << 21)
1298
1299     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1300     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1301
1302     {
1303         unsigned int mb_intra_ub, score_dep;
1304         int x_outer, y_outer, x_inner, y_inner;
1305         int xtemp_outer = 0;
1306         int first_mb = 0;
1307         int num_mb = mb_width * mb_height;
1308
1309         x_outer = 0;
1310         y_outer = 0;
1311         
1312                                  
1313         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1314             x_inner = x_outer;
1315             y_inner = y_outer;
1316             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1317                 mb_intra_ub = 0;
1318                 score_dep = 0;
1319                 if (x_inner != 0) {
1320                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1321                     score_dep |= MB_SCOREBOARD_A; 
1322                 }
1323                 if (y_inner != 0) {
1324                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1325                     score_dep |= MB_SCOREBOARD_B;
1326
1327                     if (x_inner != 0)
1328                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1329
1330                     if (x_inner != (mb_width -1)) {
1331                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1332                         score_dep |= MB_SCOREBOARD_C;
1333                     }
1334                 }
1335                                                         
1336                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1337                 *command_ptr++ = kernel;
1338                 *command_ptr++ = MPEG2_SCOREBOARD;
1339                 /* Indirect data */
1340                 *command_ptr++ = 0;
1341                 /* the (X, Y) term of scoreboard */
1342                 *command_ptr++ = ((y_inner << 16) | x_inner);
1343                 *command_ptr++ = score_dep;
1344                 /*inline data */
1345                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1346                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1347                 x_inner -= 2;
1348                 y_inner += 1;
1349             }
1350             x_outer += 1;
1351         }
1352
1353         xtemp_outer = mb_width - 2;
1354         if (xtemp_outer < 0)
1355             xtemp_outer = 0;
1356         x_outer = xtemp_outer;
1357         y_outer = 0;
1358         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1359             y_inner = y_outer;
1360             x_inner = x_outer;
1361             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1362                 mb_intra_ub = 0;
1363                 score_dep = 0;
1364                 if (x_inner != 0) {
1365                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1366                     score_dep |= MB_SCOREBOARD_A; 
1367                 }
1368                 if (y_inner != 0) {
1369                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1370                     score_dep |= MB_SCOREBOARD_B;
1371
1372                     if (x_inner != 0)
1373                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1374
1375                     if (x_inner != (mb_width -1)) {
1376                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1377                         score_dep |= MB_SCOREBOARD_C;
1378                     }
1379                 }
1380
1381                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1382                 *command_ptr++ = kernel;
1383                 *command_ptr++ = MPEG2_SCOREBOARD;
1384                 /* Indirect data */
1385                 *command_ptr++ = 0;
1386                 /* the (X, Y) term of scoreboard */
1387                 *command_ptr++ = ((y_inner << 16) | x_inner);
1388                 *command_ptr++ = score_dep;
1389                 /*inline data */
1390                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1391                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1392
1393                 x_inner -= 2;
1394                 y_inner += 1;
1395             }
1396             x_outer++;
1397             if (x_outer >= mb_width) {
1398                 y_outer += 1;
1399                 x_outer = xtemp_outer;
1400             }           
1401         }
1402     }
1403
1404     *command_ptr++ = 0;
1405     *command_ptr++ = MI_BATCH_BUFFER_END;
1406
1407     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1408     return;
1409 }
1410
1411 static int
1412 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1413                           VAPictureH264 *ref_list,
1414                           int num_pictures,
1415                           int dir)
1416 {
1417     int i, found = -1, min = 0x7FFFFFFF;
1418
1419     for (i = 0; i < num_pictures; i++) {
1420         int tmp;
1421
1422         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1423             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1424             break;
1425
1426         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1427
1428         if (dir)
1429             tmp = -tmp;
1430
1431         if (tmp > 0 && tmp < min) {
1432             min = tmp;
1433             found = i;
1434         }
1435     }
1436
1437     return found;
1438 }
1439
1440 void
1441 intel_avc_vme_reference_state(VADriverContextP ctx,
1442                               struct encode_state *encode_state,
1443                               struct intel_encoder_context *encoder_context,
1444                               int list_index,
1445                               int surface_index,
1446                               void (* vme_source_surface_state)(
1447                                   VADriverContextP ctx,
1448                                   int index,
1449                                   struct object_surface *obj_surface,
1450                                   struct intel_encoder_context *encoder_context))
1451 {
1452     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1453     struct object_surface *obj_surface = NULL;
1454     struct i965_driver_data *i965 = i965_driver_data(ctx);
1455     VASurfaceID ref_surface_id;
1456     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1457     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1458     int max_num_references;
1459     VAPictureH264 *curr_pic;
1460     VAPictureH264 *ref_list;
1461     int ref_idx;
1462
1463     if (list_index == 0) {
1464         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1465         ref_list = slice_param->RefPicList0;
1466     } else {
1467         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1468         ref_list = slice_param->RefPicList1;
1469     }
1470
1471     if (max_num_references == 1) {
1472         if (list_index == 0) {
1473             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1474             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1475         } else {
1476             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1477             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1478         }
1479
1480         if (ref_surface_id != VA_INVALID_SURFACE)
1481             obj_surface = SURFACE(ref_surface_id);
1482
1483         if (!obj_surface ||
1484             !obj_surface->bo) {
1485             obj_surface = encode_state->reference_objects[list_index];
1486             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1487         }
1488
1489         ref_idx = 0;
1490     } else {
1491         curr_pic = &pic_param->CurrPic;
1492
1493         /* select the reference frame in temporal space */
1494         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1495         ref_surface_id = ref_list[ref_idx].picture_id;
1496
1497         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1498             obj_surface = SURFACE(ref_surface_id);
1499
1500         vme_context->used_reference_objects[list_index] = obj_surface;
1501         vme_context->used_references[list_index] = &ref_list[ref_idx];
1502     }
1503
1504     if (obj_surface &&
1505         obj_surface->bo) {
1506         assert(ref_idx >= 0);
1507         vme_context->used_reference_objects[list_index] = obj_surface;
1508         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1509         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1510                                                     ref_idx << 16 |
1511                                                     ref_idx <<  8 |
1512                                                     ref_idx);
1513     } else {
1514         vme_context->used_reference_objects[list_index] = NULL;
1515         vme_context->used_references[list_index] = NULL;
1516         vme_context->ref_index_in_mb[list_index] = 0;
1517     }
1518 }
1519
1520 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1521                                         struct encode_state *encode_state,
1522                                         struct intel_encoder_context *encoder_context,
1523                                         int slice_index,
1524                                         struct intel_batchbuffer *slice_batch)
1525 {
1526     int count, i, start_index;
1527     unsigned int length_in_bits;
1528     VAEncPackedHeaderParameterBuffer *param = NULL;
1529     unsigned int *header_data = NULL;
1530     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1531     int slice_header_index;
1532
1533     if (encode_state->slice_header_index[slice_index] == 0)
1534         slice_header_index = -1;
1535     else
1536         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1537
1538     count = encode_state->slice_rawdata_count[slice_index];
1539     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1540
1541     for (i = 0; i < count; i++) {
1542         unsigned int skip_emul_byte_cnt;
1543
1544         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1545
1546         param = (VAEncPackedHeaderParameterBuffer *)
1547                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1548
1549         /* skip the slice header packed data type as it is lastly inserted */
1550         if (param->type == VAEncPackedHeaderSlice)
1551             continue;
1552
1553         length_in_bits = param->bit_length;
1554
1555         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1556
1557         /* as the slice header is still required, the last header flag is set to
1558          * zero.
1559          */
1560         mfc_context->insert_object(ctx,
1561                                    encoder_context,
1562                                    header_data,
1563                                    ALIGN(length_in_bits, 32) >> 5,
1564                                    length_in_bits & 0x1f,
1565                                    skip_emul_byte_cnt,
1566                                    0,
1567                                    0,
1568                                    !param->has_emulation_bytes,
1569                                    slice_batch);
1570     }
1571
1572     if (slice_header_index == -1) {
1573         unsigned char *slice_header = NULL;
1574         int slice_header_length_in_bits = 0;
1575         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1576         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1577         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1578
1579         /* No slice header data is passed. And the driver needs to generate it */
1580         /* For the Normal H264 */
1581         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1582                                                              pPicParameter,
1583                                                              pSliceParameter,
1584                                                              &slice_header);
1585         mfc_context->insert_object(ctx, encoder_context,
1586                                    (unsigned int *)slice_header,
1587                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1588                                    slice_header_length_in_bits & 0x1f,
1589                                    5,  /* first 5 bytes are start code + nal unit type */
1590                                    1, 0, 1, slice_batch);
1591
1592         free(slice_header);
1593     } else {
1594         unsigned int skip_emul_byte_cnt;
1595
1596         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1597
1598         param = (VAEncPackedHeaderParameterBuffer *)
1599                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1600         length_in_bits = param->bit_length;
1601
1602         /* as the slice header is the last header data for one slice,
1603          * the last header flag is set to one.
1604          */
1605         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1606
1607         mfc_context->insert_object(ctx,
1608                                    encoder_context,
1609                                    header_data,
1610                                    ALIGN(length_in_bits, 32) >> 5,
1611                                    length_in_bits & 0x1f,
1612                                    skip_emul_byte_cnt,
1613                                    1,
1614                                    0,
1615                                    !param->has_emulation_bytes,
1616                                    slice_batch);
1617     }
1618
1619     return;
1620 }