X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fgen75_vme.c;h=585375766a73516ccdb6e95fa925a005a96616cd;hb=0e29103373dbadb30bab189722f19c3f9447a306;hp=4bfe0854598fea90d7fc431eb819a75ca73fc76e;hpb=788f649bc66b773e3af1e23870431c19c2c1136d;p=profile%2Fivi%2Fvaapi-intel-driver.git diff --git a/src/gen75_vme.c b/src/gen75_vme.c index 4bfe085..5853757 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -55,6 +55,7 @@ #define VME_INTRA_SHADER 0 #define VME_INTER_SHADER 1 +#define VME_BINTER_SHADER 3 #define VME_BATCHBUFFER 2 #define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */ @@ -63,10 +64,6 @@ #define VME_MSG_LENGTH 32 -#define MB_SCOREBOARD_A (1 << 0) -#define MB_SCOREBOARD_B (1 << 1) -#define MB_SCOREBOARD_C (1 << 2) - static const uint32_t gen75_vme_intra_frame[][4] = { #include "shaders/vme/intra_frame_haswell.g75b" }; @@ -75,6 +72,10 @@ static const uint32_t gen75_vme_inter_frame[][4] = { #include "shaders/vme/inter_frame_haswell.g75b" }; +static const uint32_t gen75_vme_inter_bframe[][4] = { +#include "shaders/vme/inter_bframe_haswell.g75b" +}; + static const uint32_t gen75_vme_batchbuffer[][4] = { #include "shaders/vme/batchbuffer.g75b" }; @@ -101,6 +102,13 @@ static struct i965_kernel gen75_vme_kernels[] = { sizeof(gen75_vme_batchbuffer), NULL }, + { + "VME inter BFrame", + VME_BINTER_SHADER, + gen75_vme_inter_bframe, + sizeof(gen75_vme_inter_bframe), + NULL + } }; static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = { @@ -305,7 +313,7 @@ static VAStatus gen75_vme_interface_setup(VADriverContextP ctx, assert(bo->virtual); desc = bo->virtual; - for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) { + for (i = 0; i < vme_context->vme_kernel_sum; i++) { struct i965_kernel *kernel; kernel = &vme_context->gpe_context.kernels[i]; assert(sizeof(*desc) == 32); @@ -444,7 +452,7 @@ static void gen75_vme_state_setup_fixup(VADriverContextP ctx, if (encoder_context->rate_control_mode == VA_RC_CQP) vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; else - vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY]; + vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY]; } static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx, @@ -486,11 +494,6 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -#define INTRA_PRED_AVAIL_FLAG_AE 0x60 -#define INTRA_PRED_AVAIL_FLAG_B 0x10 -#define INTRA_PRED_AVAIL_FLAG_C 0x8 -#define INTRA_PRED_AVAIL_FLAG_D 0x4 -#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C static void gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, @@ -562,82 +565,9 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, dri_bo_unmap(vme_context->vme_batchbuffer.bo); } - -static void -gen75_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) -{ - struct gen6_vme_context *vme_context = encoder_context->vme_context; - int mb_x = 0, mb_y = 0; - int mb_row; - int i, s; - unsigned int *command_ptr; - int temp; - - -#define USE_SCOREBOARD (1 << 21) - - dri_bo_map(vme_context->vme_batchbuffer.bo, 1); - command_ptr = vme_context->vme_batchbuffer.bo->virtual; - - for (s = 0; s < encode_state->num_slice_params_ext; s++) { - VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; - int slice_mb_begin = pSliceParameter->macroblock_address; - int slice_mb_number = pSliceParameter->num_macroblocks; - unsigned int mb_intra_ub, score_dep; - int slice_mb_x = pSliceParameter->macroblock_address % mb_width; - mb_row = slice_mb_begin / mb_width; - for (i = 0; i < slice_mb_number; ) { - int mb_count = i + slice_mb_begin; - mb_x = mb_count % mb_width; - mb_y = mb_count / mb_width; - mb_intra_ub = 0; - score_dep = 0; - if (mb_x != 0) { - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; - score_dep |= MB_SCOREBOARD_A; - } - if (mb_y != mb_row) { - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; - score_dep |= MB_SCOREBOARD_B; - if (mb_x != 0) - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; - if (mb_x != (mb_width -1)) { - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; - score_dep |= MB_SCOREBOARD_C; - } - } - - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); - *command_ptr++ = kernel; - *command_ptr++ = USE_SCOREBOARD; - *command_ptr++ = 0; - /* the (X, Y) term of scoreboard */ - *command_ptr++ = ((mb_y << 16) | mb_x); - *command_ptr++ = score_dep; - /*inline data */ - *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); - - i += 1; - } - } - - *command_ptr++ = 0; - *command_ptr++ = MI_BATCH_BUFFER_END; - - dri_bo_unmap(vme_context->vme_batchbuffer.bo); -} - static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_vme_context *vme_context = encoder_context->vme_context; - dri_bo *bo; i965_gpe_context_init(ctx, &vme_context->gpe_context); @@ -662,9 +592,9 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx, VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int kernel_shader; bool allow_hwscore = true; int s; @@ -675,19 +605,29 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx, break; } } - + if ((pSliceParameter->slice_type == SLICE_TYPE_I) || + (pSliceParameter->slice_type == SLICE_TYPE_I)) { + kernel_shader = VME_INTRA_SHADER; + } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + kernel_shader = VME_INTER_SHADER; + } else { + kernel_shader = VME_BINTER_SHADER; + if (!allow_hwscore) + kernel_shader = VME_INTER_SHADER; + } if (allow_hwscore) - gen75_vme_walker_fill_vme_batchbuffer(ctx, + gen7_vme_walker_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, - is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + kernel_shader, pPicParameter->pic_fields.bits.transform_8x8_mode_flag, encoder_context); else gen75_vme_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, - is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + kernel_shader, pPicParameter->pic_fields.bits.transform_8x8_mode_flag, encoder_context); @@ -882,11 +822,6 @@ gen75_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, int i, s, j; unsigned int *command_ptr; -#define INTRA_PRED_AVAIL_FLAG_AE 0x60 -#define INTRA_PRED_AVAIL_FLAG_B 0x10 -#define INTRA_PRED_AVAIL_FLAG_C 0x8 -#define INTRA_PRED_AVAIL_FLAG_D 0x4 -#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -1054,6 +989,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * { struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); struct i965_kernel *vme_kernel_list = NULL; + int i965_kernel_num; switch (encoder_context->profile) { case VAProfileH264Baseline: @@ -1061,13 +997,14 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * case VAProfileH264High: vme_kernel_list = gen75_vme_kernels; encoder_context->vme_pipeline = gen75_vme_pipeline; - + i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); break; case VAProfileMPEG2Simple: case VAProfileMPEG2Main: vme_kernel_list = gen75_vme_mpeg2_kernels; encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline; + i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); break; @@ -1077,7 +1014,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * break; } - + vme_context->vme_kernel_sum = i965_kernel_num; vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; @@ -1091,29 +1028,12 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1; - vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING; - vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A | - MB_SCOREBOARD_B | - MB_SCOREBOARD_C); - - /* In VME prediction the current mb depends on the neighbour - * A/B/C macroblock. So the left/up/up-right dependency should - * be considered. - */ - vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1; - vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0; - vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0; - vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1; - vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1; - vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1; - - vme_context->gpe_context.vfe_desc7.dword = 0; + gen7_vme_scoreboard_init(ctx, vme_context); i965_gpe_load_kernels(ctx, &vme_context->gpe_context, vme_kernel_list, - GEN6_VME_KERNEL_NUMBER); + i965_kernel_num); vme_context->vme_surface2_setup = gen7_gpe_surface2_setup; vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup; vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;