+ } else if (rate_control_mode == VA_RC_CBR) {
+ // this is frist AU
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ unsigned char *sei_data = NULL;
+
+ int length_in_bits = build_avc_sei_buffer_timing(
+ mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
+ mfc_context->vui_hrd.i_initial_cpb_removal_delay,
+ 0,
+ mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
+ mfc_context->vui_hrd.i_dpb_output_delay_length,
+ 0,
+ &sei_data);
+ mfc_context->insert_object(ctx,
+ encoder_context,
+ (unsigned int *)sei_data,
+ ALIGN(length_in_bits, 32) >> 5,
+ length_in_bits & 0x1f,
+ 4,
+ 0,
+ 0,
+ 1,
+ slice_batch);
+ free(sei_data);
+ }
+}
+
+VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct object_surface *obj_surface;
+ struct object_buffer *obj_buffer;
+ GenAvcSurface *gen6_avc_surface;
+ dri_bo *bo;
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ int i, j, enable_avc_ildb = 0;
+ VAEncSliceParameterBufferH264 *slice_param;
+ struct i965_coded_buffer_segment *coded_buffer_segment;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+
+ if (IS_GEN6(i965->intel.device_info)) {
+ /* On the SNB it should be fixed to 128 for the DMV buffer */
+ width_in_mbs = 128;
+ }
+
+ for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
+ assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
+ slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
+
+ for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (slice_param->disable_deblocking_filter_idc != 1) {
+ enable_avc_ildb = 1;
+ break;
+ }
+
+ slice_param++;
+ }
+ }
+
+ /*Setup all the input&output object*/
+
+ /* Setup current frame and current direct mv buffer*/
+ obj_surface = encode_state->reconstructed_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+
+ if ( obj_surface->private_data == NULL) {
+ gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ gen6_avc_surface->dmv_top =
+ dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 68 * width_in_mbs * height_in_mbs,
+ 64);
+ gen6_avc_surface->dmv_bottom =
+ dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 68 * width_in_mbs * height_in_mbs,
+ 64);
+ assert(gen6_avc_surface->dmv_top);
+ assert(gen6_avc_surface->dmv_bottom);
+ obj_surface->private_data = (void *)gen6_avc_surface;
+ obj_surface->free_private_data = (void *)gen_free_avc_surface;
+ }
+ gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
+ mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
+ mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
+ dri_bo_reference(gen6_avc_surface->dmv_top);
+ dri_bo_reference(gen6_avc_surface->dmv_bottom);
+
+ if (enable_avc_ildb) {
+ mfc_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->post_deblocking_output.bo);
+ } else {
+ mfc_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->pre_deblocking_output.bo);
+ }
+
+ mfc_context->surface_state.width = obj_surface->orig_width;
+ mfc_context->surface_state.height = obj_surface->orig_height;
+ mfc_context->surface_state.w_pitch = obj_surface->width;
+ mfc_context->surface_state.h_pitch = obj_surface->height;
+
+ /* Setup reference frames and direct mv buffers*/
+ for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
+ obj_surface = encode_state->reference_objects[i];
+
+ if (obj_surface && obj_surface->bo) {
+ mfc_context->reference_surfaces[i].bo = obj_surface->bo;
+ dri_bo_reference(obj_surface->bo);
+
+ /* Check DMV buffer */
+ if ( obj_surface->private_data == NULL) {
+
+ gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ gen6_avc_surface->dmv_top =
+ dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 68 * width_in_mbs * height_in_mbs,
+ 64);
+ gen6_avc_surface->dmv_bottom =
+ dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 68 * width_in_mbs * height_in_mbs,
+ 64);
+ assert(gen6_avc_surface->dmv_top);
+ assert(gen6_avc_surface->dmv_bottom);
+ obj_surface->private_data = gen6_avc_surface;
+ obj_surface->free_private_data = gen_free_avc_surface;
+ }
+
+ gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
+ /* Setup DMV buffer */
+ mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
+ mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
+ dri_bo_reference(gen6_avc_surface->dmv_top);
+ dri_bo_reference(gen6_avc_surface->dmv_bottom);
+ } else {
+ break;
+ }
+ }
+
+ mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
+ dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
+
+ obj_buffer = encode_state->coded_buf_object;
+ bo = obj_buffer->buffer_store->bo;
+ mfc_context->mfc_indirect_pak_bse_object.bo = bo;
+ mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
+ mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
+ dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
+
+ dri_bo_map(bo, 1);
+ coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
+ coded_buffer_segment->mapped = 0;
+ coded_buffer_segment->codec = encoder_context->codec;
+ dri_bo_unmap(bo);
+
+ return vaStatus;
+}
+/*
+ * The LUT uses the pair of 4-bit units: (shift, base) structure.
+ * 2^K * X = value .
+ * So it is necessary to convert one cost into the nearest LUT format.
+ * The derivation is:
+ * 2^K *x = 2^n * (1 + deltaX)
+ * k + log2(x) = n + log2(1 + deltaX)
+ * log2(x) = n - k + log2(1 + deltaX)
+ * As X is in the range of [1, 15]
+ * 4 > n - k + log2(1 + deltaX) >= 0
+ * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
+ * Then we can derive the corresponding K and get the nearest LUT format.
+ */
+int intel_format_lutvalue(int value, int max)
+{
+ int ret;
+ int logvalue, temp1, temp2;
+
+ if (value <= 0)
+ return 0;
+
+ logvalue = (int)(log2f((float)value));
+ if (logvalue < 4) {
+ ret = value;
+ } else {
+ int error, temp_value, base, j, temp_err;
+ error = value;
+ j = logvalue - 4 + 1;
+ ret = -1;
+ for(; j <= logvalue; j++) {
+ if (j == 0) {
+ base = value >> j;
+ } else {
+ base = (value + (1 << (j - 1)) - 1) >> j;
+ }
+ if (base >= 16)
+ continue;
+
+ temp_value = base << j;
+ temp_err = abs(value - temp_value);
+ if (temp_err < error) {
+ error = temp_err;
+ ret = (j << 4) | base;
+ if (temp_err == 0)
+ break;
+ }
+ }
+ }
+ temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
+ temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
+ if (temp1 > temp2)
+ ret = max;
+ return ret;
+
+}
+
+
+#define QP_MAX 52
+
+
+static float intel_lambda_qp(int qp)
+{
+ float value, lambdaf;
+ value = qp;
+ value = value / 6 - 2;
+ if (value < 0)
+ value = 0;
+ lambdaf = roundf(powf(2, value));
+ return lambdaf;
+}
+
+
+void intel_vme_update_mbmv_cost(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int qp, m_cost, j, mv_count;
+ uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+ float lambda, m_costf;
+
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+
+ if (encoder_context->rate_control_mode == VA_RC_CQP)
+ qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+ else
+ qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
+
+ if (vme_state_message == NULL)
+ return;
+
+ assert(qp <= QP_MAX);
+ lambda = intel_lambda_qp(qp);
+ if (slice_type == SLICE_TYPE_I) {
+ vme_state_message[MODE_INTRA_16X16] = 0;
+ m_cost = lambda * 4;
+ vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_cost = lambda * 16;
+ vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+ m_cost = lambda * 3;
+ vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+ } else {
+ m_cost = 0;
+ vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
+ for (j = 1; j < 3; j++) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+ }
+ mv_count = 3;
+ for (j = 4; j <= 64; j *= 2) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
+ mv_count++;
+ }
+
+ if (qp <= 25) {
+ vme_state_message[MODE_INTRA_16X16] = 0x4a;
+ vme_state_message[MODE_INTRA_8X8] = 0x4a;
+ vme_state_message[MODE_INTRA_4X4] = 0x4a;
+ vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
+ vme_state_message[MODE_INTER_16X16] = 0x4a;
+ vme_state_message[MODE_INTER_16X8] = 0x4a;
+ vme_state_message[MODE_INTER_8X8] = 0x4a;
+ vme_state_message[MODE_INTER_8X4] = 0x4a;
+ vme_state_message[MODE_INTER_4X4] = 0x4a;
+ vme_state_message[MODE_INTER_BWD] = 0x2a;
+ return;
+ }
+ m_costf = lambda * 10;
+ vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_cost = lambda * 14;
+ vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_cost = lambda * 24;
+ vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 3.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+ if (slice_type == SLICE_TYPE_P) {
+ m_costf = lambda * 2.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 4;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 1.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 3;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+ /* BWD is not used in P-frame */
+ vme_state_message[MODE_INTER_BWD] = 0;
+ } else {
+ m_costf = lambda * 2.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 5.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 3.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 5.0;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 6.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 1.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
+ }
+ }
+}
+
+
+#define MB_SCOREBOARD_A (1 << 0)
+#define MB_SCOREBOARD_B (1 << 1)
+#define MB_SCOREBOARD_C (1 << 2)
+void
+gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
+{
+ vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
+ vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
+ vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
+ MB_SCOREBOARD_B |
+ MB_SCOREBOARD_C);
+
+ /* In VME prediction the current mb depends on the neighbour
+ * A/B/C macroblock. So the left/up/up-right dependency should
+ * be considered.
+ */
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
+
+ vme_context->gpe_context.vfe_desc7.dword = 0;
+ return;
+}
+
+/* check whether the mb of (x_index, y_index) is out of bound */
+static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
+{
+ int mb_index;
+ if (x_index < 0 || x_index >= mb_width)
+ return -1;
+ if (y_index < 0 || y_index >= mb_height)
+ return -1;
+
+ mb_index = y_index * mb_width + x_index;
+ if (mb_index < first_mb || mb_index > (first_mb + num_mb))
+ return -1;
+ return 0;
+}
+
+void
+gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int mb_row;
+ int s;
+ unsigned int *command_ptr;
+
+#define USE_SCOREBOARD (1 << 21)
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ int first_mb = pSliceParameter->macroblock_address;
+ int num_mb = pSliceParameter->num_macroblocks;
+ unsigned int mb_intra_ub, score_dep;
+ int x_outer, y_outer, x_inner, y_inner;
+ int xtemp_outer = 0;
+
+ x_outer = first_mb % mb_width;
+ y_outer = first_mb / mb_width;
+ mb_row = y_outer;
+
+ for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ x_inner = x_outer;
+ y_inner = y_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != mb_row) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = USE_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer += 1;
+ }
+
+ xtemp_outer = mb_width - 2;
+ if (xtemp_outer < 0)
+ xtemp_outer = 0;
+ x_outer = xtemp_outer;
+ y_outer = first_mb / mb_width;
+ for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ y_inner = y_outer;
+ x_inner = x_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != mb_row) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = USE_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer++;
+ if (x_outer >= mb_width) {
+ y_outer += 1;
+ x_outer = xtemp_outer;
+ }
+ }
+ }
+
+ *command_ptr++ = 0;
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
+static uint8_t
+intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
+{
+ unsigned int is_long_term =
+ !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
+ unsigned int is_top_field =
+ !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
+ unsigned int is_bottom_field =
+ !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
+
+ return ((is_long_term << 6) |
+ ((is_top_field ^ is_bottom_field ^ 1) << 5) |
+ (frame_store_id << 1) |
+ ((is_top_field ^ 1) & is_bottom_field));
+}
+
+void
+intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ int slice_type;
+ struct object_surface *obj_surface;
+ unsigned int fref_entry, bref_entry;
+ int frame_index, i;
+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+
+ fref_entry = 0x80808080;
+ bref_entry = 0x80808080;
+ slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+ if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
+ int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
+
+ if (ref_idx_l0 > 3) {
+ WARN_ONCE("ref_idx_l0 is out of range\n");
+ ref_idx_l0 = 0;
+ }
+
+ obj_surface = vme_context->used_reference_objects[0];
+ frame_index = -1;
+ for (i = 0; i < 16; i++) {
+ if (obj_surface &&
+ obj_surface == encode_state->reference_objects[i]) {
+ frame_index = i;
+ break;
+ }
+ }
+ if (frame_index == -1) {
+ WARN_ONCE("RefPicList0 is not found in DPB!\n");
+ } else {
+ int ref_idx_l0_shift = ref_idx_l0 * 8;
+ fref_entry &= ~(0xFF << ref_idx_l0_shift);
+ fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
+ }
+ }
+
+ if (slice_type == SLICE_TYPE_B) {
+ int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
+
+ if (ref_idx_l1 > 3) {
+ WARN_ONCE("ref_idx_l1 is out of range\n");
+ ref_idx_l1 = 0;
+ }
+
+ obj_surface = vme_context->used_reference_objects[1];
+ frame_index = -1;
+ for (i = 0; i < 16; i++) {
+ if (obj_surface &&
+ obj_surface == encode_state->reference_objects[i]) {
+ frame_index = i;
+ break;
+ }
+ }
+ if (frame_index == -1) {
+ WARN_ONCE("RefPicList1 is not found in DPB!\n");
+ } else {
+ int ref_idx_l1_shift = ref_idx_l1 * 8;
+ bref_entry &= ~(0xFF << ref_idx_l1_shift);
+ bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
+ }
+ }
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
+ OUT_BCS_BATCH(batch, 0); //Select L0
+ OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
+ for(i = 0; i < 7; i++) {
+ OUT_BCS_BATCH(batch, 0x80808080);
+ }
+ ADVANCE_BCS_BATCH(batch);
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
+ OUT_BCS_BATCH(batch, 1); //Select L1
+ OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
+ for(i = 0; i < 7; i++) {
+ OUT_BCS_BATCH(batch, 0x80808080);
+ }
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
+ VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
+ int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
+ uint32_t mv_x, mv_y;
+ VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
+ VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
+
+ if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
+ mv_x = 512;
+ mv_y = 64;
+ } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
+ mv_x = 1024;
+ mv_y = 128;
+ } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
+ mv_x = 2048;
+ mv_y = 128;
+ } else {
+ WARN_ONCE("Incorrect Mpeg2 level setting!\n");
+ mv_x = 512;
+ mv_y = 64;
+ }
+
+ pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
+ if (pic_param->picture_type != VAEncPictureTypeIntra) {
+ int qp, m_cost, j, mv_count;
+ float lambda, m_costf;
+ slice_param = (VAEncSliceParameterBufferMPEG2 *)
+ encode_state->slice_params_ext[0]->buffer;
+ qp = slice_param->quantiser_scale_code;
+ lambda = intel_lambda_qp(qp);
+ /* No Intra prediction. So it is zero */
+ vme_state_message[MODE_INTRA_8X8] = 0;
+ vme_state_message[MODE_INTRA_4X4] = 0;
+ vme_state_message[MODE_INTER_MV0] = 0;
+ for (j = 1; j < 3; j++) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+ }
+ mv_count = 3;
+ for (j = 4; j <= 64; j *= 2) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + mv_count] =
+ intel_format_lutvalue(m_cost, 0x6f);
+ mv_count++;
+ }
+ m_cost = lambda;
+ /* It can only perform the 16x16 search. So mode cost can be ignored for
+ * the other mode. for example: 16x8/8x8
+ */
+ vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+
+ vme_state_message[MODE_INTER_16X8] = 0;
+ vme_state_message[MODE_INTER_8X8] = 0;
+ vme_state_message[MODE_INTER_8X4] = 0;
+ vme_state_message[MODE_INTER_4X4] = 0;
+ vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
+
+ }
+ vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
+
+ vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
+ width_in_mbs;
+}
+
+void
+gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ unsigned int *command_ptr;
+
+#define MPEG2_SCOREBOARD (1 << 21)
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ {
+ unsigned int mb_intra_ub, score_dep;
+ int x_outer, y_outer, x_inner, y_inner;
+ int xtemp_outer = 0;
+ int first_mb = 0;
+ int num_mb = mb_width * mb_height;
+
+ x_outer = 0;
+ y_outer = 0;
+
+
+ for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ x_inner = x_outer;
+ y_inner = y_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = MPEG2_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer += 1;
+ }
+
+ xtemp_outer = mb_width - 2;
+ if (xtemp_outer < 0)
+ xtemp_outer = 0;
+ x_outer = xtemp_outer;
+ y_outer = 0;
+ for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+ y_inner = y_outer;
+ x_inner = x_outer;
+ for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (x_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (y_inner != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+
+ if (x_inner != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+ if (x_inner != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = MPEG2_SCOREBOARD;
+ /* Indirect data */
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((y_inner << 16) | x_inner);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+ *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+
+ x_inner -= 2;
+ y_inner += 1;
+ }
+ x_outer++;
+ if (x_outer >= mb_width) {
+ y_outer += 1;
+ x_outer = xtemp_outer;
+ }
+ }
+ }
+
+ *command_ptr++ = 0;
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+ return;
+}
+
+static int
+avc_temporal_find_surface(VAPictureH264 *curr_pic,
+ VAPictureH264 *ref_list,
+ int num_pictures,
+ int dir)
+{
+ int i, found = -1, min = 0x7FFFFFFF;
+
+ for (i = 0; i < num_pictures; i++) {
+ int tmp;
+
+ if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
+ (ref_list[i].picture_id == VA_INVALID_SURFACE))
+ break;
+
+ tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
+
+ if (dir)
+ tmp = -tmp;
+
+ if (tmp > 0 && tmp < min) {
+ min = tmp;
+ found = i;
+ }