7cf9cc6f55cd57a7fd1c30344fb85f5058a48cb4
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define BRC_CLIP(x, min, max)                                   \
47     {                                                           \
48         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
49     }
50
51 #define BRC_P_B_QP_DIFF 4
52 #define BRC_I_P_QP_DIFF 2
53 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
54
55 #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
56 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
57
58 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
59 #define BRC_CY 0.1 /* weight for */
60 #define BRC_CX_UNDERFLOW 5.
61 #define BRC_CX_OVERFLOW -4.
62
63 #define BRC_PI_0_5 1.5707963267948966192313216916398
64
65 #ifndef HAVE_LOG2F
66 #define log2f(x) (logf(x)/(float)M_LN2)
67 #endif
68
69 int intel_avc_enc_slice_type_fixup(int slice_type)
70 {
71     if (slice_type == SLICE_TYPE_SP ||
72         slice_type == SLICE_TYPE_P)
73         slice_type = SLICE_TYPE_P;
74     else if (slice_type == SLICE_TYPE_SI ||
75              slice_type == SLICE_TYPE_I)
76         slice_type = SLICE_TYPE_I;
77     else {
78         if (slice_type != SLICE_TYPE_B)
79             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
80
81         slice_type = SLICE_TYPE_B;
82     }
83
84     return slice_type;
85 }
86
87 static void
88 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
89                                         struct gen6_mfc_context *mfc_context)
90 {
91     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
92     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
93     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
94     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
95     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
96     int intra_mb_size = inter_mb_size * 5.0;
97     int i;
98
99     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
100     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
101     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
102     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
103     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
104     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
105
106     for(i = 0 ; i < 3; i++) {
107         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
108         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
109         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
110         mfc_context->bit_rate_control_context[i].GrowInit = 6;
111         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
112         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
113         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
114         
115         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
116         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
117         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
118         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
119         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
120         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
121     }
122     
123     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
124     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
125     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
126
127     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
128     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
129     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
130 }
131
132 static void intel_mfc_brc_init(struct encode_state *encode_state,
133                                struct intel_encoder_context* encoder_context)
134 {
135     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
136     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
137     VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
138     VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
139     double bitrate = pSequenceParameter->bits_per_second;
140     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
141     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
142     int intra_period = pSequenceParameter->intra_period;
143     int ip_period = pSequenceParameter->ip_period;
144     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
145     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
146     double bpf;
147
148     if (pSequenceParameter->ip_period) {
149         pnum = (intra_period + ip_period - 1)/ip_period - 1;
150         bnum = intra_period - inum - pnum;
151     }
152
153     mfc_context->brc.mode = encoder_context->rate_control_mode;
154
155     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
156                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
157     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
158     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
159
160     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
161     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
162     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
163
164     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
165
166     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
167     mfc_context->hrd.current_buffer_fullness =
168         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
169         pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
170     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
171     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
172     mfc_context->hrd.violation_noted = 0;
173
174     if ((bpf > qp51_size) && (bpf < qp1_size)) {
175         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
176     }
177     else if (bpf >= qp1_size)
178         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
179     else if (bpf <= qp51_size)
180         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
181
182     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
183     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
184
185     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
186     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
187     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
188 }
189
190 int intel_mfc_update_hrd(struct encode_state *encode_state,
191                          struct gen6_mfc_context *mfc_context,
192                          int frame_bits)
193 {
194     double prev_bf = mfc_context->hrd.current_buffer_fullness;
195
196     mfc_context->hrd.current_buffer_fullness -= frame_bits;
197
198     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
199         mfc_context->hrd.current_buffer_fullness = prev_bf;
200         return BRC_UNDERFLOW;
201     }
202     
203     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
204     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
205         if (mfc_context->brc.mode == VA_RC_VBR)
206             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
207         else {
208             mfc_context->hrd.current_buffer_fullness = prev_bf;
209             return BRC_OVERFLOW;
210         }
211     }
212     return BRC_NO_HRD_VIOLATION;
213 }
214
215 int intel_mfc_brc_postpack(struct encode_state *encode_state,
216                            struct gen6_mfc_context *mfc_context,
217                            int frame_bits)
218 {
219     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
220     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
221     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
222     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
223     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
224     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
225     int qp; // quantizer of previously encoded slice of current type
226     int qpn; // predicted quantizer for next frame of current type in integer format
227     double qpf; // predicted quantizer for next frame of current type in float format
228     double delta_qp; // QP correction
229     int target_frame_size, frame_size_next;
230     /* Notes:
231      *  x - how far we are from HRD buffer borders
232      *  y - how far we are from target HRD buffer fullness
233      */
234     double x, y;
235     double frame_size_alpha;
236
237     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
238
239     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
240     if (mfc_context->hrd.buffer_capacity < 5)
241         frame_size_alpha = 0;
242     else
243         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
244     if (frame_size_alpha > 30) frame_size_alpha = 30;
245     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
246         (double)(frame_size_alpha + 1.);
247
248     /* frame_size_next: avoiding negative number and too small value */
249     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
250         frame_size_next = (int)((double)target_frame_size * 0.25);
251
252     qpf = (double)qp * target_frame_size / frame_size_next;
253     qpn = (int)(qpf + 0.5);
254
255     if (qpn == qp) {
256         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
257         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
258         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
259             qpn++;
260             mfc_context->brc.qpf_rounding_accumulator = 0.;
261         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
262             qpn--;
263             mfc_context->brc.qpf_rounding_accumulator = 0.;
264         }
265     }
266     /* making sure that QP is not changing too fast */
267     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
268     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
269     /* making sure that with QP predictions we did do not leave QPs range */
270     BRC_CLIP(qpn, 1, 51);
271
272     /* checking wthether HRD compliance is still met */
273     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
274
275     /* calculating QP delta as some function*/
276     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
277     if (x > 0) {
278         x /= mfc_context->hrd.target_buffer_fullness;
279         y = mfc_context->hrd.current_buffer_fullness;
280     }
281     else {
282         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
283         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
284     }
285     if (y < 0.01) y = 0.01;
286     if (x > 1) x = 1;
287     else if (x < -1) x = -1;
288
289     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
290     qpn = (int)(qpn + delta_qp + 0.5);
291
292     /* making sure that with QP predictions we did do not leave QPs range */
293     BRC_CLIP(qpn, 1, 51);
294
295     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
296         /* correcting QPs of slices of other types */
297         if (slicetype == SLICE_TYPE_P) {
298             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
299                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
300             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
301                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
302         } else if (slicetype == SLICE_TYPE_I) {
303             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
304                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
305             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
306                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
307         } else { // SLICE_TYPE_B
308             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
309                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
310             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
311                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
312         }
313         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
314         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
315         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
316     } else if (sts == BRC_UNDERFLOW) { // underflow
317         if (qpn <= qp) qpn = qp + 1;
318         if (qpn > 51) {
319             qpn = 51;
320             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
321         }
322     } else if (sts == BRC_OVERFLOW) {
323         if (qpn >= qp) qpn = qp - 1;
324         if (qpn < 1) { // < 0 (?) overflow with minQP
325             qpn = 1;
326             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
327         }
328     }
329
330     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
331
332     return sts;
333 }
334
335 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
336                                        struct intel_encoder_context *encoder_context)
337 {
338     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
339     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
340     unsigned int rate_control_mode = encoder_context->rate_control_mode;
341     int target_bit_rate = pSequenceParameter->bits_per_second;
342     
343     // current we only support CBR mode.
344     if (rate_control_mode == VA_RC_CBR) {
345         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
346         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
347         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
348         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
349         mfc_context->vui_hrd.i_frame_number = 0;
350
351         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
352         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
353         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
354     }
355
356 }
357
358 void 
359 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
360                              struct gen6_mfc_context *mfc_context)
361 {
362     mfc_context->vui_hrd.i_frame_number++;
363 }
364
365 int intel_mfc_interlace_check(VADriverContextP ctx,
366                               struct encode_state *encode_state,
367                               struct intel_encoder_context *encoder_context)
368 {
369     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
370     VAEncSliceParameterBufferH264 *pSliceParameter;
371     int i;
372     int mbCount = 0;
373     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
374     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
375   
376     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
377         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
378         mbCount += pSliceParameter->num_macroblocks; 
379     }
380     
381     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
382         return 0;
383
384     return 1;
385 }
386
387 void intel_mfc_brc_prepare(struct encode_state *encode_state,
388                            struct intel_encoder_context *encoder_context)
389 {
390     unsigned int rate_control_mode = encoder_context->rate_control_mode;
391     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
392
393     if (rate_control_mode == VA_RC_CBR) {
394         assert(encoder_context->codec != CODEC_MPEG2);
395
396         /*Programing bit rate control */
397         if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
398             intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
399             intel_mfc_brc_init(encode_state, encoder_context);
400         }
401
402         /*Programing HRD control */
403         if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
404             intel_mfc_hrd_context_init(encode_state, encoder_context);    
405     }
406 }
407
408 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
409                                               struct encode_state *encode_state,
410                                               struct intel_encoder_context *encoder_context,
411                                               struct intel_batchbuffer *slice_batch)
412 {
413     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
414     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
415     unsigned int rate_control_mode = encoder_context->rate_control_mode;
416
417     if (encode_state->packed_header_data[idx]) {
418         VAEncPackedHeaderParameterBuffer *param = NULL;
419         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
420         unsigned int length_in_bits;
421
422         assert(encode_state->packed_header_param[idx]);
423         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
424         length_in_bits = param->bit_length;
425
426         mfc_context->insert_object(ctx,
427                                    encoder_context,
428                                    header_data,
429                                    ALIGN(length_in_bits, 32) >> 5,
430                                    length_in_bits & 0x1f,
431                                    5,   /* FIXME: check it */
432                                    0,
433                                    0,
434                                    !param->has_emulation_bytes,
435                                    slice_batch);
436     }
437
438     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
439
440     if (encode_state->packed_header_data[idx]) {
441         VAEncPackedHeaderParameterBuffer *param = NULL;
442         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
443         unsigned int length_in_bits;
444
445         assert(encode_state->packed_header_param[idx]);
446         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
447         length_in_bits = param->bit_length;
448
449         mfc_context->insert_object(ctx,
450                                    encoder_context,
451                                    header_data,
452                                    ALIGN(length_in_bits, 32) >> 5,
453                                    length_in_bits & 0x1f,
454                                    5, /* FIXME: check it */
455                                    0,
456                                    0,
457                                    !param->has_emulation_bytes,
458                                    slice_batch);
459     }
460     
461     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
462
463     if (encode_state->packed_header_data[idx]) {
464         VAEncPackedHeaderParameterBuffer *param = NULL;
465         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
466         unsigned int length_in_bits;
467
468         assert(encode_state->packed_header_param[idx]);
469         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
470         length_in_bits = param->bit_length;
471
472         mfc_context->insert_object(ctx,
473                                    encoder_context,
474                                    header_data,
475                                    ALIGN(length_in_bits, 32) >> 5,
476                                    length_in_bits & 0x1f,
477                                    5, /* FIXME: check it */
478                                    0,
479                                    0,
480                                    !param->has_emulation_bytes,
481                                    slice_batch);
482     } else if (rate_control_mode == VA_RC_CBR) {
483         // this is frist AU
484         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
485
486         unsigned char *sei_data = NULL;
487     
488         int length_in_bits = build_avc_sei_buffer_timing(
489             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
490             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
491             0,
492             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
493             mfc_context->vui_hrd.i_dpb_output_delay_length,
494             0,
495             &sei_data);
496         mfc_context->insert_object(ctx,
497                                    encoder_context,
498                                    (unsigned int *)sei_data,
499                                    ALIGN(length_in_bits, 32) >> 5,
500                                    length_in_bits & 0x1f,
501                                    4,   
502                                    0,   
503                                    0,   
504                                    1,
505                                    slice_batch);  
506         free(sei_data);
507     }
508 }
509
510 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
511                                struct encode_state *encode_state,
512                                struct intel_encoder_context *encoder_context)
513 {
514     struct i965_driver_data *i965 = i965_driver_data(ctx);
515     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
516     struct object_surface *obj_surface; 
517     struct object_buffer *obj_buffer;
518     GenAvcSurface *gen6_avc_surface;
519     dri_bo *bo;
520     VAStatus vaStatus = VA_STATUS_SUCCESS;
521     int i, j, enable_avc_ildb = 0;
522     VAEncSliceParameterBufferH264 *slice_param;
523     struct i965_coded_buffer_segment *coded_buffer_segment;
524     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
525     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
526     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
527
528     if (IS_GEN6(i965->intel.device_id)) {
529         /* On the SNB it should be fixed to 128 for the DMV buffer */
530         width_in_mbs = 128;
531     }
532
533     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
534         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
535         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
536
537         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
538             assert((slice_param->slice_type == SLICE_TYPE_I) ||
539                    (slice_param->slice_type == SLICE_TYPE_SI) ||
540                    (slice_param->slice_type == SLICE_TYPE_P) ||
541                    (slice_param->slice_type == SLICE_TYPE_SP) ||
542                    (slice_param->slice_type == SLICE_TYPE_B));
543
544             if (slice_param->disable_deblocking_filter_idc != 1) {
545                 enable_avc_ildb = 1;
546                 break;
547             }
548
549             slice_param++;
550         }
551     }
552
553     /*Setup all the input&output object*/
554
555     /* Setup current frame and current direct mv buffer*/
556     obj_surface = encode_state->reconstructed_object;
557     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
558
559     if ( obj_surface->private_data == NULL) {
560         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
561         gen6_avc_surface->dmv_top = 
562             dri_bo_alloc(i965->intel.bufmgr,
563                          "Buffer",
564                          68 * width_in_mbs * height_in_mbs, 
565                          64);
566         gen6_avc_surface->dmv_bottom = 
567             dri_bo_alloc(i965->intel.bufmgr,
568                          "Buffer",
569                          68 * width_in_mbs * height_in_mbs, 
570                          64);
571         assert(gen6_avc_surface->dmv_top);
572         assert(gen6_avc_surface->dmv_bottom);
573         obj_surface->private_data = (void *)gen6_avc_surface;
574         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
575     }
576     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
577     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
578     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
579     dri_bo_reference(gen6_avc_surface->dmv_top);
580     dri_bo_reference(gen6_avc_surface->dmv_bottom);
581
582     if (enable_avc_ildb) {
583         mfc_context->post_deblocking_output.bo = obj_surface->bo;
584         dri_bo_reference(mfc_context->post_deblocking_output.bo);
585     } else {
586         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
587         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
588     }
589
590     mfc_context->surface_state.width = obj_surface->orig_width;
591     mfc_context->surface_state.height = obj_surface->orig_height;
592     mfc_context->surface_state.w_pitch = obj_surface->width;
593     mfc_context->surface_state.h_pitch = obj_surface->height;
594     
595     /* Setup reference frames and direct mv buffers*/
596     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
597         obj_surface = encode_state->reference_objects[i];
598         
599         if (obj_surface && obj_surface->bo) {
600             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
601             dri_bo_reference(obj_surface->bo);
602
603             /* Check DMV buffer */
604             if ( obj_surface->private_data == NULL) {
605                 
606                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
607                 gen6_avc_surface->dmv_top = 
608                     dri_bo_alloc(i965->intel.bufmgr,
609                                  "Buffer",
610                                  68 * width_in_mbs * height_in_mbs, 
611                                  64);
612                 gen6_avc_surface->dmv_bottom = 
613                     dri_bo_alloc(i965->intel.bufmgr,
614                                  "Buffer",
615                                  68 * width_in_mbs * height_in_mbs, 
616                                  64);
617                 assert(gen6_avc_surface->dmv_top);
618                 assert(gen6_avc_surface->dmv_bottom);
619                 obj_surface->private_data = gen6_avc_surface;
620                 obj_surface->free_private_data = gen_free_avc_surface; 
621             }
622     
623             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
624             /* Setup DMV buffer */
625             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
626             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
627             dri_bo_reference(gen6_avc_surface->dmv_top);
628             dri_bo_reference(gen6_avc_surface->dmv_bottom);
629         } else {
630             break;
631         }
632     }
633         
634     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
635     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
636
637     obj_buffer = encode_state->coded_buf_object;
638     bo = obj_buffer->buffer_store->bo;
639     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
640     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
641     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
642     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
643     
644     dri_bo_map(bo, 1);
645     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
646     coded_buffer_segment->mapped = 0;
647     coded_buffer_segment->codec = encoder_context->codec;
648     dri_bo_unmap(bo);
649
650     return vaStatus;
651 }
652 /*
653  * The LUT uses the pair of 4-bit units: (shift, base) structure.
654  * 2^K * X = value . 
655  * So it is necessary to convert one cost into the nearest LUT format.
656  * The derivation is:
657  * 2^K *x = 2^n * (1 + deltaX)
658  *    k + log2(x) = n + log2(1 + deltaX)
659  *    log2(x) = n - k + log2(1 + deltaX)
660  *    As X is in the range of [1, 15]
661  *      4 > n - k + log2(1 + deltaX) >= 0 
662  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
663  *    Then we can derive the corresponding K and get the nearest LUT format.
664  */
665 int intel_format_lutvalue(int value, int max)
666 {
667     int ret;
668     int logvalue, temp1, temp2;
669
670     if (value <= 0)
671         return 0;
672
673     logvalue = (int)(log2f((float)value));
674     if (logvalue < 4) {
675         ret = value;
676     } else {
677         int error, temp_value, base, j, temp_err;
678         error = value;
679         j = logvalue - 4 + 1;
680         ret = -1;
681         for(; j <= logvalue; j++) {
682             if (j == 0) {
683                 base = value >> j;
684             } else {
685                 base = (value + (1 << (j - 1)) - 1) >> j;
686             }
687             if (base >= 16)
688                 continue;
689
690             temp_value = base << j;
691             temp_err = abs(value - temp_value);
692             if (temp_err < error) {
693                 error = temp_err;
694                 ret = (j << 4) | base;
695                 if (temp_err == 0)
696                     break;
697             }
698         }
699     }
700     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
701     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
702     if (temp1 > temp2)
703         ret = max;
704     return ret;
705         
706 }
707
708
709 #define         QP_MAX                  52
710
711
712 static float intel_lambda_qp(int qp)
713 {
714     float value, lambdaf;
715     value = qp;
716     value = value / 6 - 2;
717     if (value < 0)
718         value = 0;
719     lambdaf = roundf(powf(2, value));
720     return lambdaf;
721 }
722
723
724 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
725                                 struct encode_state *encode_state,
726                                 struct intel_encoder_context *encoder_context)
727 {
728     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
729     struct gen6_vme_context *vme_context = encoder_context->vme_context;
730     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
731     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
732     int qp, m_cost, j, mv_count;
733     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
734     float   lambda, m_costf;
735
736     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
737
738     
739     if (encoder_context->rate_control_mode == VA_RC_CQP)
740         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
741     else
742         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
743   
744     if (vme_state_message == NULL)
745         return;
746  
747     assert(qp <= QP_MAX); 
748     lambda = intel_lambda_qp(qp);
749     if (slice_type == SLICE_TYPE_I) {
750         vme_state_message[MODE_INTRA_16X16] = 0;
751         m_cost = lambda * 4;
752         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
753         m_cost = lambda * 16; 
754         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
755         m_cost = lambda * 3;
756         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
757     } else {
758         m_cost = 0;
759         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
760         for (j = 1; j < 3; j++) {
761             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
762             m_cost = (int)m_costf;
763             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
764         }
765         mv_count = 3;
766         for (j = 4; j <= 64; j *= 2) {
767             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
768             m_cost = (int)m_costf;
769             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
770             mv_count++;
771         }
772
773         if (qp <= 25) {
774             vme_state_message[MODE_INTRA_16X16] = 0x4a;
775             vme_state_message[MODE_INTRA_8X8] = 0x4a;
776             vme_state_message[MODE_INTRA_4X4] = 0x4a;
777             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
778             vme_state_message[MODE_INTER_16X16] = 0x4a;
779             vme_state_message[MODE_INTER_16X8] = 0x4a;
780             vme_state_message[MODE_INTER_8X8] = 0x4a;
781             vme_state_message[MODE_INTER_8X4] = 0x4a;
782             vme_state_message[MODE_INTER_4X4] = 0x4a;
783             vme_state_message[MODE_INTER_BWD] = 0x2a;
784             return;
785         }
786         m_costf = lambda * 10;
787         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
788         m_cost = lambda * 14;
789         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
790         m_cost = lambda * 24; 
791         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
792         m_costf = lambda * 3.5;
793         m_cost = m_costf;
794         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
795         if (slice_type == SLICE_TYPE_P) {
796             m_costf = lambda * 2.5;
797             m_cost = m_costf;
798             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
799             m_costf = lambda * 4;
800             m_cost = m_costf;
801             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
802             m_costf = lambda * 1.5;
803             m_cost = m_costf;
804             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
805             m_costf = lambda * 3;
806             m_cost = m_costf;
807             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
808             m_costf = lambda * 5;
809             m_cost = m_costf;
810             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
811             /* BWD is not used in P-frame */
812             vme_state_message[MODE_INTER_BWD] = 0;
813         } else {
814             m_costf = lambda * 2.5;
815             m_cost = m_costf;
816             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
817             m_costf = lambda * 5.5;
818             m_cost = m_costf;
819             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
820             m_costf = lambda * 3.5;
821             m_cost = m_costf;
822             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
823             m_costf = lambda * 5.0;
824             m_cost = m_costf;
825             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
826             m_costf = lambda * 6.5;
827             m_cost = m_costf;
828             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
829             m_costf = lambda * 1.5;
830             m_cost = m_costf;
831             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
832         }
833     }
834 }
835
836
837 #define         MB_SCOREBOARD_A         (1 << 0)
838 #define         MB_SCOREBOARD_B         (1 << 1)
839 #define         MB_SCOREBOARD_C         (1 << 2)
840 void 
841 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
842 {
843     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
844     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
845     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
846                                                            MB_SCOREBOARD_B |
847                                                            MB_SCOREBOARD_C);
848
849     /* In VME prediction the current mb depends on the neighbour 
850      * A/B/C macroblock. So the left/up/up-right dependency should
851      * be considered.
852      */
853     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
854     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
855     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
856     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
857     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
858     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
859         
860     vme_context->gpe_context.vfe_desc7.dword = 0;
861     return;
862 }
863
864 /* check whether the mb of (x_index, y_index) is out of bound */
865 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
866 {
867     int mb_index;
868     if (x_index < 0 || x_index >= mb_width)
869         return -1;
870     if (y_index < 0 || y_index >= mb_height)
871         return -1;
872         
873     mb_index = y_index * mb_width + x_index;
874     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
875         return -1;
876     return 0;
877 }
878
879 void
880 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
881                                      struct encode_state *encode_state,
882                                      int mb_width, int mb_height,
883                                      int kernel,
884                                      int transform_8x8_mode_flag,
885                                      struct intel_encoder_context *encoder_context)
886 {
887     struct gen6_vme_context *vme_context = encoder_context->vme_context;
888     int mb_row;
889     int s;
890     unsigned int *command_ptr;
891
892 #define         USE_SCOREBOARD          (1 << 21)
893  
894     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
895     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
896
897     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
898         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
899         int first_mb = pSliceParameter->macroblock_address;
900         int num_mb = pSliceParameter->num_macroblocks;
901         unsigned int mb_intra_ub, score_dep;
902         int x_outer, y_outer, x_inner, y_inner;
903         int xtemp_outer = 0;
904
905         x_outer = first_mb % mb_width;
906         y_outer = first_mb / mb_width;
907         mb_row = y_outer;
908                                  
909         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
910             x_inner = x_outer;
911             y_inner = y_outer;
912             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
913                 mb_intra_ub = 0;
914                 score_dep = 0;
915                 if (x_inner != 0) {
916                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
917                     score_dep |= MB_SCOREBOARD_A; 
918                 }
919                 if (y_inner != mb_row) {
920                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
921                     score_dep |= MB_SCOREBOARD_B;
922                     if (x_inner != 0)
923                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
924                     if (x_inner != (mb_width -1)) {
925                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
926                         score_dep |= MB_SCOREBOARD_C;
927                     }
928                 }
929                                                         
930                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
931                 *command_ptr++ = kernel;
932                 *command_ptr++ = USE_SCOREBOARD;
933                 /* Indirect data */
934                 *command_ptr++ = 0;
935                 /* the (X, Y) term of scoreboard */
936                 *command_ptr++ = ((y_inner << 16) | x_inner);
937                 *command_ptr++ = score_dep;
938                 /*inline data */
939                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
940                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
941                 x_inner -= 2;
942                 y_inner += 1;
943             }
944             x_outer += 1;
945         }
946
947         xtemp_outer = mb_width - 2;
948         if (xtemp_outer < 0)
949             xtemp_outer = 0;
950         x_outer = xtemp_outer;
951         y_outer = first_mb / mb_width;
952         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
953             y_inner = y_outer;
954             x_inner = x_outer;
955             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
956                 mb_intra_ub = 0;
957                 score_dep = 0;
958                 if (x_inner != 0) {
959                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
960                     score_dep |= MB_SCOREBOARD_A; 
961                 }
962                 if (y_inner != mb_row) {
963                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
964                     score_dep |= MB_SCOREBOARD_B;
965                     if (x_inner != 0)
966                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
967
968                     if (x_inner != (mb_width -1)) {
969                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
970                         score_dep |= MB_SCOREBOARD_C;
971                     }
972                 }
973
974                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
975                 *command_ptr++ = kernel;
976                 *command_ptr++ = USE_SCOREBOARD;
977                 /* Indirect data */
978                 *command_ptr++ = 0;
979                 /* the (X, Y) term of scoreboard */
980                 *command_ptr++ = ((y_inner << 16) | x_inner);
981                 *command_ptr++ = score_dep;
982                 /*inline data */
983                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
984                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
985
986                 x_inner -= 2;
987                 y_inner += 1;
988             }
989             x_outer++;
990             if (x_outer >= mb_width) {
991                 y_outer += 1;
992                 x_outer = xtemp_outer;
993             }           
994         }
995     }
996
997     *command_ptr++ = 0;
998     *command_ptr++ = MI_BATCH_BUFFER_END;
999
1000     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1001 }
1002
1003 static uint8_t
1004 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1005 {
1006     unsigned int is_long_term =
1007         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1008     unsigned int is_top_field =
1009         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1010     unsigned int is_bottom_field =
1011         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1012
1013     return ((is_long_term                         << 6) |
1014             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1015             (frame_store_id                       << 1) |
1016             ((is_top_field ^ 1) & is_bottom_field));
1017 }
1018
1019 void
1020 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1021                             struct encode_state *encode_state,
1022                             struct intel_encoder_context *encoder_context)
1023 {
1024     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1025     struct intel_batchbuffer *batch = encoder_context->base.batch;
1026     int slice_type;
1027     struct object_surface *obj_surface;
1028     unsigned int fref_entry, bref_entry;
1029     int frame_index, i;
1030     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1031
1032     fref_entry = 0x80808080;
1033     bref_entry = 0x80808080;
1034     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1035
1036     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1037         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1038
1039         if (ref_idx_l0 > 3) {
1040             WARN_ONCE("ref_idx_l0 is out of range\n");
1041             ref_idx_l0 = 0;
1042         }
1043
1044         obj_surface = vme_context->used_reference_objects[0];
1045         frame_index = -1;
1046         for (i = 0; i < 16; i++) {
1047             if (obj_surface &&
1048                 obj_surface == encode_state->reference_objects[i]) {
1049                 frame_index = i;
1050                 break;
1051             }
1052         }
1053         if (frame_index == -1) {
1054             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1055         } else {
1056             int ref_idx_l0_shift = ref_idx_l0 * 8;
1057             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1058             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1059         }
1060     }
1061
1062     if (slice_type == SLICE_TYPE_B) {
1063         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1064
1065         if (ref_idx_l1 > 3) {
1066             WARN_ONCE("ref_idx_l1 is out of range\n");
1067             ref_idx_l1 = 0;
1068         }
1069
1070         obj_surface = vme_context->used_reference_objects[1];
1071         frame_index = -1;
1072         for (i = 0; i < 16; i++) {
1073             if (obj_surface &&
1074                 obj_surface == encode_state->reference_objects[i]) {
1075                 frame_index = i;
1076                 break;
1077             }
1078         }
1079         if (frame_index == -1) {
1080             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1081         } else {
1082             int ref_idx_l1_shift = ref_idx_l1 * 8;
1083             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1084             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1085         }
1086     }
1087
1088     BEGIN_BCS_BATCH(batch, 10);
1089     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1090     OUT_BCS_BATCH(batch, 0);                  //Select L0
1091     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1092     for(i = 0; i < 7; i++) {
1093         OUT_BCS_BATCH(batch, 0x80808080);
1094     }
1095     ADVANCE_BCS_BATCH(batch);
1096
1097     BEGIN_BCS_BATCH(batch, 10);
1098     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1099     OUT_BCS_BATCH(batch, 1);                  //Select L1
1100     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1101     for(i = 0; i < 7; i++) {
1102         OUT_BCS_BATCH(batch, 0x80808080);
1103     }
1104     ADVANCE_BCS_BATCH(batch);
1105 }
1106
1107
1108 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1109                                  struct encode_state *encode_state,
1110                                  struct intel_encoder_context *encoder_context)
1111 {
1112     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1113     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1114     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1115     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1116     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1117     uint32_t mv_x, mv_y;
1118     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1119     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1120     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1121
1122     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1123         mv_x = 512;
1124         mv_y = 64;
1125     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1126         mv_x = 1024;
1127         mv_y = 128;
1128     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1129         mv_x = 2048;
1130         mv_y = 128;
1131     } else {
1132         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1133         mv_x = 512;
1134         mv_y = 64;
1135     }
1136
1137     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1138     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1139         int qp, m_cost, j, mv_count;
1140         float   lambda, m_costf;
1141         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1142             encode_state->slice_params_ext[0]->buffer;
1143         qp = slice_param->quantiser_scale_code;
1144         lambda = intel_lambda_qp(qp);
1145         /* No Intra prediction. So it is zero */
1146         vme_state_message[MODE_INTRA_8X8] = 0;
1147         vme_state_message[MODE_INTRA_4X4] = 0;
1148         vme_state_message[MODE_INTER_MV0] = 0;
1149         for (j = 1; j < 3; j++) {
1150             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1151             m_cost = (int)m_costf;
1152             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1153         }
1154         mv_count = 3;
1155         for (j = 4; j <= 64; j *= 2) {
1156             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1157             m_cost = (int)m_costf;
1158             vme_state_message[MODE_INTER_MV0 + mv_count] =
1159                 intel_format_lutvalue(m_cost, 0x6f);
1160             mv_count++;
1161         }
1162         m_cost = lambda;
1163         /* It can only perform the 16x16 search. So mode cost can be ignored for
1164          * the other mode. for example: 16x8/8x8
1165          */
1166         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1167         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1168                         
1169         vme_state_message[MODE_INTER_16X8] = 0;
1170         vme_state_message[MODE_INTER_8X8] = 0;
1171         vme_state_message[MODE_INTER_8X4] = 0;
1172         vme_state_message[MODE_INTER_4X4] = 0;
1173         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1174
1175     }
1176     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1177
1178     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1179         width_in_mbs;
1180 }
1181
1182 void
1183 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1184                                            struct encode_state *encode_state,
1185                                            int mb_width, int mb_height,
1186                                            int kernel,
1187                                            struct intel_encoder_context *encoder_context)
1188 {
1189     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1190     unsigned int *command_ptr;
1191
1192 #define         MPEG2_SCOREBOARD                (1 << 21)
1193
1194     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1195     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1196
1197     {
1198         unsigned int mb_intra_ub, score_dep;
1199         int x_outer, y_outer, x_inner, y_inner;
1200         int xtemp_outer = 0;
1201         int first_mb = 0;
1202         int num_mb = mb_width * mb_height;
1203
1204         x_outer = 0;
1205         y_outer = 0;
1206         
1207                                  
1208         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1209             x_inner = x_outer;
1210             y_inner = y_outer;
1211             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1212                 mb_intra_ub = 0;
1213                 score_dep = 0;
1214                 if (x_inner != 0) {
1215                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1216                     score_dep |= MB_SCOREBOARD_A; 
1217                 }
1218                 if (y_inner != 0) {
1219                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1220                     score_dep |= MB_SCOREBOARD_B;
1221
1222                     if (x_inner != 0)
1223                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1224
1225                     if (x_inner != (mb_width -1)) {
1226                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1227                         score_dep |= MB_SCOREBOARD_C;
1228                     }
1229                 }
1230                                                         
1231                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1232                 *command_ptr++ = kernel;
1233                 *command_ptr++ = MPEG2_SCOREBOARD;
1234                 /* Indirect data */
1235                 *command_ptr++ = 0;
1236                 /* the (X, Y) term of scoreboard */
1237                 *command_ptr++ = ((y_inner << 16) | x_inner);
1238                 *command_ptr++ = score_dep;
1239                 /*inline data */
1240                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1241                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1242                 x_inner -= 2;
1243                 y_inner += 1;
1244             }
1245             x_outer += 1;
1246         }
1247
1248         xtemp_outer = mb_width - 2;
1249         if (xtemp_outer < 0)
1250             xtemp_outer = 0;
1251         x_outer = xtemp_outer;
1252         y_outer = 0;
1253         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1254             y_inner = y_outer;
1255             x_inner = x_outer;
1256             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1257                 mb_intra_ub = 0;
1258                 score_dep = 0;
1259                 if (x_inner != 0) {
1260                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1261                     score_dep |= MB_SCOREBOARD_A; 
1262                 }
1263                 if (y_inner != 0) {
1264                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1265                     score_dep |= MB_SCOREBOARD_B;
1266
1267                     if (x_inner != 0)
1268                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1269
1270                     if (x_inner != (mb_width -1)) {
1271                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1272                         score_dep |= MB_SCOREBOARD_C;
1273                     }
1274                 }
1275
1276                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1277                 *command_ptr++ = kernel;
1278                 *command_ptr++ = MPEG2_SCOREBOARD;
1279                 /* Indirect data */
1280                 *command_ptr++ = 0;
1281                 /* the (X, Y) term of scoreboard */
1282                 *command_ptr++ = ((y_inner << 16) | x_inner);
1283                 *command_ptr++ = score_dep;
1284                 /*inline data */
1285                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1286                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1287
1288                 x_inner -= 2;
1289                 y_inner += 1;
1290             }
1291             x_outer++;
1292             if (x_outer >= mb_width) {
1293                 y_outer += 1;
1294                 x_outer = xtemp_outer;
1295             }           
1296         }
1297     }
1298
1299     *command_ptr++ = 0;
1300     *command_ptr++ = MI_BATCH_BUFFER_END;
1301
1302     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1303     return;
1304 }
1305
1306 static int
1307 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1308                           VAPictureH264 *ref_list,
1309                           int num_pictures,
1310                           int dir)
1311 {
1312     int i, found = -1, min = 0x7FFFFFFF;
1313
1314     for (i = 0; i < num_pictures; i++) {
1315         int tmp;
1316
1317         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1318             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1319             break;
1320
1321         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1322
1323         if (dir)
1324             tmp = -tmp;
1325
1326         if (tmp > 0 && tmp < min) {
1327             min = tmp;
1328             found = i;
1329         }
1330     }
1331
1332     return found;
1333 }
1334
1335 void
1336 intel_avc_vme_reference_state(VADriverContextP ctx,
1337                               struct encode_state *encode_state,
1338                               struct intel_encoder_context *encoder_context,
1339                               int list_index,
1340                               int surface_index,
1341                               void (* vme_source_surface_state)(
1342                                   VADriverContextP ctx,
1343                                   int index,
1344                                   struct object_surface *obj_surface,
1345                                   struct intel_encoder_context *encoder_context))
1346 {
1347     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1348     struct object_surface *obj_surface = NULL;
1349     struct i965_driver_data *i965 = i965_driver_data(ctx);
1350     VASurfaceID ref_surface_id;
1351     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1352     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1353     int max_num_references;
1354     VAPictureH264 *curr_pic;
1355     VAPictureH264 *ref_list;
1356     int ref_idx;
1357
1358     if (list_index == 0) {
1359         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1360         ref_list = slice_param->RefPicList0;
1361     } else {
1362         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1363         ref_list = slice_param->RefPicList1;
1364     }
1365
1366     if (max_num_references == 1) {
1367         if (list_index == 0) {
1368             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1369             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1370         } else {
1371             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1372             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1373         }
1374
1375         if (ref_surface_id != VA_INVALID_SURFACE)
1376             obj_surface = SURFACE(ref_surface_id);
1377
1378         if (!obj_surface ||
1379             !obj_surface->bo) {
1380             obj_surface = encode_state->reference_objects[list_index];
1381             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1382         }
1383
1384         ref_idx = 0;
1385     } else {
1386         curr_pic = &pic_param->CurrPic;
1387
1388         /* select the reference frame in temporal space */
1389         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1390         ref_surface_id = ref_list[ref_idx].picture_id;
1391
1392         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1393             obj_surface = SURFACE(ref_surface_id);
1394
1395         vme_context->used_reference_objects[list_index] = obj_surface;
1396         vme_context->used_references[list_index] = &ref_list[ref_idx];
1397     }
1398
1399     if (obj_surface &&
1400         obj_surface->bo) {
1401         assert(ref_idx >= 0);
1402         vme_context->used_reference_objects[list_index] = obj_surface;
1403         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1404         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1405                                                     ref_idx << 16 |
1406                                                     ref_idx <<  8 |
1407                                                     ref_idx);
1408     } else {
1409         vme_context->used_reference_objects[list_index] = NULL;
1410         vme_context->used_references[list_index] = NULL;
1411         vme_context->ref_index_in_mb[list_index] = 0;
1412     }
1413 }