#define VME_INTRA_SHADER 0
#define VME_INTER_SHADER 1
+#define VME_BINTER_SHADER 3
#define VME_BATCHBUFFER 2
#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
#define VME_MSG_LENGTH 32
-#define MB_SCOREBOARD_A (1 << 0)
-#define MB_SCOREBOARD_B (1 << 1)
-#define MB_SCOREBOARD_C (1 << 2)
-
static const uint32_t gen75_vme_intra_frame[][4] = {
#include "shaders/vme/intra_frame_haswell.g75b"
};
#include "shaders/vme/inter_frame_haswell.g75b"
};
+static const uint32_t gen75_vme_inter_bframe[][4] = {
+#include "shaders/vme/inter_bframe_haswell.g75b"
+};
+
static const uint32_t gen75_vme_batchbuffer[][4] = {
#include "shaders/vme/batchbuffer.g75b"
};
sizeof(gen75_vme_batchbuffer),
NULL
},
+ {
+ "VME inter BFrame",
+ VME_BINTER_SHADER,
+ gen75_vme_inter_bframe,
+ sizeof(gen75_vme_inter_bframe),
+ NULL
+ }
};
static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
assert(bo->virtual);
desc = bo->virtual;
- for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ for (i = 0; i < vme_context->vme_kernel_sum; i++) {
struct i965_kernel *kernel;
kernel = &vme_context->gpe_context.kernels[i];
assert(sizeof(*desc) == 32);
if (encoder_context->rate_control_mode == VA_RC_CQP)
vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
else
- vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
+ vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
}
static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
-#define INTRA_PRED_AVAIL_FLAG_AE 0x60
-#define INTRA_PRED_AVAIL_FLAG_B 0x10
-#define INTRA_PRED_AVAIL_FLAG_C 0x8
-#define INTRA_PRED_AVAIL_FLAG_D 0x4
-#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
static void
gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
dri_bo_unmap(vme_context->vme_batchbuffer.bo);
}
-
-static void
-gen75_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
- struct encode_state *encode_state,
- int mb_width, int mb_height,
- int kernel,
- int transform_8x8_mode_flag,
- struct intel_encoder_context *encoder_context)
-{
- struct gen6_vme_context *vme_context = encoder_context->vme_context;
- int mb_x = 0, mb_y = 0;
- int mb_row;
- int i, s;
- unsigned int *command_ptr;
- int temp;
-
-
-#define USE_SCOREBOARD (1 << 21)
-
- dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
- command_ptr = vme_context->vme_batchbuffer.bo->virtual;
-
- for (s = 0; s < encode_state->num_slice_params_ext; s++) {
- VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
- int slice_mb_begin = pSliceParameter->macroblock_address;
- int slice_mb_number = pSliceParameter->num_macroblocks;
- unsigned int mb_intra_ub, score_dep;
- int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
- mb_row = slice_mb_begin / mb_width;
- for (i = 0; i < slice_mb_number; ) {
- int mb_count = i + slice_mb_begin;
- mb_x = mb_count % mb_width;
- mb_y = mb_count / mb_width;
- mb_intra_ub = 0;
- score_dep = 0;
- if (mb_x != 0) {
- mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
- score_dep |= MB_SCOREBOARD_A;
- }
- if (mb_y != mb_row) {
- mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
- score_dep |= MB_SCOREBOARD_B;
- if (mb_x != 0)
- mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
- if (mb_x != (mb_width -1)) {
- mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
- score_dep |= MB_SCOREBOARD_C;
- }
- }
-
- *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
- *command_ptr++ = kernel;
- *command_ptr++ = USE_SCOREBOARD;
- *command_ptr++ = 0;
- /* the (X, Y) term of scoreboard */
- *command_ptr++ = ((mb_y << 16) | mb_x);
- *command_ptr++ = score_dep;
- /*inline data */
- *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
- *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
-
- i += 1;
- }
- }
-
- *command_ptr++ = 0;
- *command_ptr++ = MI_BATCH_BUFFER_END;
-
- dri_bo_unmap(vme_context->vme_batchbuffer.bo);
-}
-
static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = encoder_context->vme_context;
- dri_bo *bo;
i965_gpe_context_init(ctx, &vme_context->gpe_context);
VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ int kernel_shader;
bool allow_hwscore = true;
int s;
break;
}
}
-
+ if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
+ (pSliceParameter->slice_type == SLICE_TYPE_I)) {
+ kernel_shader = VME_INTRA_SHADER;
+ } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
+ (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
+ kernel_shader = VME_INTER_SHADER;
+ } else {
+ kernel_shader = VME_BINTER_SHADER;
+ if (!allow_hwscore)
+ kernel_shader = VME_INTER_SHADER;
+ }
if (allow_hwscore)
- gen75_vme_walker_fill_vme_batchbuffer(ctx,
+ gen7_vme_walker_fill_vme_batchbuffer(ctx,
encode_state,
width_in_mbs, height_in_mbs,
- is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+ kernel_shader,
pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
encoder_context);
else
gen75_vme_fill_vme_batchbuffer(ctx,
encode_state,
width_in_mbs, height_in_mbs,
- is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+ kernel_shader,
pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
encoder_context);
int i, s, j;
unsigned int *command_ptr;
-#define INTRA_PRED_AVAIL_FLAG_AE 0x60
-#define INTRA_PRED_AVAIL_FLAG_B 0x10
-#define INTRA_PRED_AVAIL_FLAG_C 0x8
-#define INTRA_PRED_AVAIL_FLAG_D 0x4
-#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
command_ptr = vme_context->vme_batchbuffer.bo->virtual;
{
struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
struct i965_kernel *vme_kernel_list = NULL;
+ int i965_kernel_num;
switch (encoder_context->profile) {
case VAProfileH264Baseline:
case VAProfileH264High:
vme_kernel_list = gen75_vme_kernels;
encoder_context->vme_pipeline = gen75_vme_pipeline;
-
+ i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel);
break;
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
vme_kernel_list = gen75_vme_mpeg2_kernels;
encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
+ i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
break;
break;
}
-
+ vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
- vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
- vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
- vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
- MB_SCOREBOARD_B |
- MB_SCOREBOARD_C);
-
- /* In VME prediction the current mb depends on the neighbour
- * A/B/C macroblock. So the left/up/up-right dependency should
- * be considered.
- */
- vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
- vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
- vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
- vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
- vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
- vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
-
- vme_context->gpe_context.vfe_desc7.dword = 0;
+ gen7_vme_scoreboard_init(ctx, vme_context);
i965_gpe_load_kernels(ctx,
&vme_context->gpe_context,
vme_kernel_list,
- GEN6_VME_KERNEL_NUMBER);
+ i965_kernel_num);
vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;