From 7b1aa6b294a7e5c83d1065931119d38a33b7d46b Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 10 Jun 2011 10:48:16 +0800 Subject: [PATCH] i965_drv_video: encode on Ivybridge Signed-off-by: Xiang, Haihao --- i965_drv_video/gen6_mfc.c | 258 ++++++++++++++++++++++++++++++-- i965_drv_video/gen6_vme.c | 297 +++++++++++++++++++++++++++++++++++-- i965_drv_video/i965_defines.h | 1 + i965_drv_video/i965_drv_video.c | 2 +- i965_drv_video/i965_structs.h | 52 +++++++ i965_drv_video/intel_batchbuffer.c | 17 ++- i965_drv_video/intel_driver.h | 1 + 7 files changed, 602 insertions(+), 26 deletions(-) diff --git a/i965_drv_video/gen6_mfc.c b/i965_drv_video/gen6_mfc.c index 45e10fe..3514402 100644 --- a/i965_drv_video/gen6_mfc.c +++ b/i965_drv_video/gen6_mfc.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010 Intel Corporation + * Copyright © 2010-2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -43,7 +43,7 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen { struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; - BEGIN_BCS_BATCH(batch,4); + BEGIN_BCS_BATCH(batch, 4); OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2)); OUT_BCS_BATCH(batch, @@ -71,6 +71,42 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen } static void +gen7_mfc_pipe_mode_select(VADriverContextP ctx, + int standard_select, + struct gen6_encoder_context *gen6_encoder_context) +{ + struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; + + assert(standard_select == MFX_FORMAT_MPEG2 || + standard_select == MFX_FORMAT_AVC); + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Must be long format for encoder */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (1 << 9) | /* Post Deblocking Output */ + (0 << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (1 << 4) | /* encoding mode */ + (standard_select << 0)); /* standard select: avc or mpeg2 */ + OUT_BCS_BATCH(batch, + (0 << 7) | /* expand NOA bus flag */ + (0 << 6) | /* disable slice-level clock gating */ + (0 << 5) | /* disable clock gating for NOA */ + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; @@ -99,6 +135,34 @@ gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e } static void +gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) +{ + struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 6); + + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((mfc_context->surface_state.height - 1) << 18) | + ((mfc_context->surface_state.width - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ + (0 << 22) | /* surface object control state, FIXME??? */ + ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 for interleave U/V */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* must be 0 for interleave U/V */ + (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */ + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; @@ -167,6 +231,31 @@ gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte } static void +gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) +{ + struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; + struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; + + BEGIN_BCS_BATCH(batch, 11); + + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* MFX Indirect MV Object Base Address */ + OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /*MFC Indirect PAK-BSE Object Base Address for Encoder*/ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + + ADVANCE_BCS_BATCH(batch); +} + +static void gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; @@ -238,6 +327,63 @@ gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e ADVANCE_BCS_BATCH(batch); } +static void +gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) +{ + struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; + + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + + BEGIN_BCS_BATCH(batch, 16); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); + OUT_BCS_BATCH(batch, + ((width_in_mbs * height_in_mbs) & 0xFFFF)); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + OUT_BCS_BATCH(batch, + (0 << 24) | /* Second Chroma QP Offset */ + (0 << 16) | /* Chroma QP Offset */ + (0 << 14) | /* Max-bit conformance Intra flag */ + (0 << 13) | /* Max Macroblock size conformance Inter flag */ + (0 << 12) | /* FIXME: Weighted_Pred_Flag */ + (0 << 10) | /* FIXME: Weighted_BiPred_Idc */ + (0 << 8) | /* FIXME: Image Structure */ + (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* Mininum Frame size */ + (0 << 15) | /* Disable reading of Macroblock Status Buffer */ + (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */ + (0 << 13) | /* CABAC 0 word insertion test enable */ + (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */ + (1 << 10) | /* Chroma Format IDC, 4:2:0 */ + (0 << 9) | /* FIXME: MbMvFormatFlag */ + (1 << 7) | /* 0:CAVLC encoding mode,1:CABAC */ + (0 << 6) | /* Only valid for VLD decoding mode */ + (0 << 5) | /* Constrained Intra Predition Flag, from PPS */ + (0 << 4) | /* Direct 8x8 inference flag */ + (0 << 3) | /* Only 8x8 IDCT Transform Mode Flag */ + (1 << 2) | /* Frame MB only flag */ + (0 << 1) | /* MBAFF mode is in active */ + (0 << 0)); /* Field picture flag */ + OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ + OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ + (0xBB8 << 16) | /* InterMbMaxSz */ + (0xEE8) ); /* IntraMbMaxSz */ + OUT_BCS_BATCH(batch, 0); /* Reserved */ + OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ + OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ + OUT_BCS_BATCH(batch, 0x8C000000); + OUT_BCS_BATCH(batch, 0x00010000); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { @@ -355,6 +501,82 @@ static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_con ADVANCE_BCS_BATCH(batch); } +static void +gen7_mfc_qm_state(VADriverContextP ctx, + int qm_type, + unsigned int *qm, + int qm_length, + struct gen6_encoder_context *gen6_encoder_context) +{ + struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; + unsigned int qm_buffer[16]; + + assert(qm_length <= 16); + assert(sizeof(*qm) == 4); + memcpy(qm_buffer, qm, qm_length * 4); + + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, qm_type << 0); + intel_batchbuffer_data(batch, qm_buffer, 16 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) +{ + unsigned int qm[16] = { + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010 + }; + + gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context); + gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context); + gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context); + gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context); +} + +static void +gen7_mfc_fqm_state(VADriverContextP ctx, + int fqm_type, + unsigned int *fqm, + int fqm_length, + struct gen6_encoder_context *gen6_encoder_context) +{ + struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; + unsigned int fqm_buffer[32]; + + assert(fqm_length <= 32); + assert(sizeof(*fqm) == 4); + memcpy(fqm_buffer, fqm, fqm_length * 4); + + BEGIN_BCS_BATCH(batch, 34); + OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2)); + OUT_BCS_BATCH(batch, fqm_type << 0); + intel_batchbuffer_data(batch, fqm_buffer, 32 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) +{ + unsigned int qm[32] = { + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000 + }; + + gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context); + gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context); + gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context); + gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context); +} + static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; @@ -373,7 +595,6 @@ static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder ADVANCE_BCS_BATCH(batch); } - static void gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context) { @@ -535,6 +756,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = gen6_encoder_context->base.batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; @@ -561,18 +783,32 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, if (emit_new_state) { intel_batchbuffer_emit_mi_flush(batch); - gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context); - gen6_mfc_surface_state(ctx, gen6_encoder_context); + + if (IS_GEN7(i965->intel.device_id)) { + gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context); + gen7_mfc_surface_state(ctx, gen6_encoder_context); + gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context); + } else { + gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context); + gen6_mfc_surface_state(ctx, gen6_encoder_context); + gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context); + } + gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context); - gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context); gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context); - gen6_mfc_avc_img_state(ctx, gen6_encoder_context); - gen6_mfc_avc_qm_state(ctx, gen6_encoder_context); - gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context); + + if (IS_GEN7(i965->intel.device_id)) { + gen7_mfc_avc_img_state(ctx, gen6_encoder_context); + gen7_mfc_avc_qm_state(ctx, gen6_encoder_context); + gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context); + } else { + gen6_mfc_avc_img_state(ctx, gen6_encoder_context); + gen6_mfc_avc_qm_state(ctx, gen6_encoder_context); + gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context); + } + gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context); - /*gen6_mfc_avc_directmode_state(ctx);*/ gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context); - /*gen6_mfc_avc_insert_object(ctx, 0);*/ emit_new_state = 0; } diff --git a/i965_drv_video/gen6_vme.c b/i965_drv_video/gen6_vme.c index 61ddcc4..09a042f 100644 --- a/i965_drv_video/gen6_vme.c +++ b/i965_drv_video/gen6_vme.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009 Intel Corporation + * Copyright © 2010-2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -40,11 +40,17 @@ #include "gen6_vme.h" #include "i965_encoder.h" -#define SURFACE_STATE_PADDED_SIZE_0 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0, SURFACE_STATE_PADDED_SIZE_1) -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) -#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) +#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) + +#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN7) + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) #define VME_INTRA_SHADER 0 #define VME_INTER_SHADER 1 @@ -55,12 +61,10 @@ static const uint32_t gen6_vme_intra_frame[][4] = { #include "shaders/vme/intra_frame.g6b" - {0,0,0,0} }; static const uint32_t gen6_vme_inter_frame[][4] = { #include "shaders/vme/inter_frame.g6b" - {0,0,0,0} }; static struct i965_kernel gen6_vme_kernels[] = { @@ -80,6 +84,31 @@ static struct i965_kernel gen6_vme_kernels[] = { } }; +static const uint32_t gen7_vme_intra_frame[][4] = { +#include "shaders/vme/intra_frame.g7b" +}; + +static const uint32_t gen7_vme_inter_frame[][4] = { +#include "shaders/vme/inter_frame.g7b" +}; + +static struct i965_kernel gen7_vme_kernels[] = { + { + "VME Intra Frame", + VME_INTRA_SHADER, /*index*/ + gen7_vme_intra_frame, + sizeof(gen7_vme_intra_frame), + NULL + }, + { + "VME inter Frame", + VME_INTER_SHADER, + gen7_vme_inter_frame, + sizeof(gen7_vme_inter_frame), + NULL + } +}; + static void gen6_vme_set_common_surface_tiling(struct i965_surface_state *ss, unsigned int tiling) { @@ -308,6 +337,246 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +/* + * Surface state for IvyBridge + */ +static void +gen7_vme_set_common_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void +gen7_vme_set_source_surface_tiling(struct gen7_surface_state2 *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss2.tiled_surface = 0; + ss->ss2.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +/* only used for VME source surface state */ +static void gen7_vme_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct gen6_encoder_context *gen6_encoder_context) +{ + struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; + struct gen7_surface_state2 *ss; + dri_bo *bo; + int w, h, w_pitch, h_pitch; + unsigned int tiling, swizzle; + + assert(obj_surface->bo); + + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + h_pitch = obj_surface->height; + + bo = vme_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen7_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss, 0, sizeof(*ss)); + + ss->ss0.surface_base_address = obj_surface->bo->offset; + + ss->ss1.cbcr_pixel_offset_v_direction = 2; + ss->ss1.width = w - 1; + ss->ss1.height = h - 1; + + ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8; + ss->ss2.interleave_chroma = 1; + ss->ss2.pitch = w_pitch - 1; + ss->ss2.half_pitch_for_chroma = 0; + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + gen7_vme_set_source_surface_tiling(ss, tiling); + + /* UV offset for interleave mode */ + ss->ss3.x_offset_for_cb = 0; + ss->ss3.y_offset_for_cb = h_pitch; + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0), + obj_surface->bo); + + ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(bo); +} + +static void +gen7_vme_media_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct gen6_encoder_context *gen6_encoder_context) +{ + struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; + struct gen7_surface_state *ss; + dri_bo *bo; + int w, h, w_pitch; + unsigned int tiling, swizzle; + + /* Y plane */ + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + bo = vme_context->surface_state_binding_table.bo; + dri_bo_map(bo, True); + assert(bo->virtual); + + ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss, 0, sizeof(*ss)); + + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + + ss->ss1.base_addr = obj_surface->bo->offset; + + ss->ss2.width = w / 4 - 1; + ss->ss2.height = h - 1; + + ss->ss3.pitch = w_pitch - 1; + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + gen7_vme_set_common_surface_tiling(ss, tiling); + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1), + obj_surface->bo); + + ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(bo); +} + +static VAStatus +gen7_vme_output_buffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct gen6_encoder_context *gen6_encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; + struct gen7_surface_state *ss; + dri_bo *bo; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer; + VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; + int is_intra = pSliceParameter->slice_flags.bits.is_intra; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int num_entries; + + if ( is_intra ) { + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs; + } else { + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4; + } + vme_context->vme_output.size_block = 16; /* an OWORD */ + vme_context->vme_output.pitch = ALIGN(vme_context->vme_output.size_block, 16); + bo = dri_bo_alloc(i965->intel.bufmgr, + "VME output buffer", + vme_context->vme_output.num_blocks * vme_context->vme_output.pitch, + 0x1000); + assert(bo); + vme_context->vme_output.bo = bo; + + bo = vme_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index)); + ss = bo->virtual; + memset(ss, 0, sizeof(*ss)); + + /* always use 16 bytes as pitch on Sandy Bridge */ + num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.pitch / 16; + + ss->ss0.surface_type = I965_SURFACE_BUFFER; + + ss->ss1.base_addr = vme_context->vme_output.bo->offset; + + ss->ss2.width = ((num_entries - 1) & 0x7f); + ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff); + ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f); + + ss->ss3.pitch = vme_context->vme_output.pitch - 1; + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1), + vme_context->vme_output.bo); + + ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen7_vme_surface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct gen6_encoder_context *gen6_encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer; + + /*Setup surfaces state*/ + /* current picture for encoding */ + obj_surface = SURFACE(encode_state->current_render_target); + assert(obj_surface); + gen7_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context); + gen7_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context); + + if ( ! is_intra ) { + /* reference 0 */ + obj_surface = SURFACE(pPicParameter->reference_picture); + assert(obj_surface); + gen7_vme_source_surface_state(ctx, 2, obj_surface, gen6_encoder_context); + /* reference 1, FIXME: */ + // obj_surface = SURFACE(pPicParameter->reference_picture); + // assert(obj_surface); + //gen7_vme_source_surface_state(ctx, 3, obj_surface); + } + + /* VME output */ + gen7_vme_output_buffer_setup(ctx, encode_state, 0, gen6_encoder_context); + + return VA_STATUS_SUCCESS; +} + static VAStatus gen6_vme_interface_setup(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) @@ -627,12 +896,17 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) { + struct i965_driver_data *i965 = i965_driver_data(ctx); VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; int is_intra = pSliceParameter->slice_flags.bits.is_intra; /*Setup all the memory object*/ - gen6_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context); + if (IS_GEN7(i965->intel.device_id)) + gen7_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context); + else + gen6_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context); + gen6_vme_interface_setup(ctx, encode_state, gen6_encoder_context); gen6_vme_constant_setup(ctx, encode_state, gen6_encoder_context); gen6_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context); @@ -679,7 +953,10 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_co struct i965_driver_data *i965 = i965_driver_data(ctx); int i; - memcpy(vme_context->vme_kernels, gen6_vme_kernels, sizeof(vme_context->vme_kernels)); + if (IS_GEN7(i965->intel.device_id)) + memcpy(vme_context->vme_kernels, gen7_vme_kernels, sizeof(vme_context->vme_kernels)); + else + memcpy(vme_context->vme_kernels, gen6_vme_kernels, sizeof(vme_context->vme_kernels)); for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) { /*Load kernel into GPU memory*/ diff --git a/i965_drv_video/i965_defines.h b/i965_drv_video/i965_defines.h index 66b0b5d..a14e111 100644 --- a/i965_drv_video/i965_defines.h +++ b/i965_drv_video/i965_defines.h @@ -283,6 +283,7 @@ #define MFX_AES_STATE MFX(2, 0, 0, 5) #define MFX_STATE_POINTER MFX(2, 0, 0, 6) #define MFX_QM_STATE MFX(2, 0, 0, 7) +#define MFX_FQM_STATE MFX(2, 0, 0, 8) #define MFX_WAIT MFX(1, 0, 0, 0) diff --git a/i965_drv_video/i965_drv_video.c b/i965_drv_video/i965_drv_video.c index f6ae6a4..c5a0267 100644 --- a/i965_drv_video/i965_drv_video.c +++ b/i965_drv_video/i965_drv_video.c @@ -158,7 +158,7 @@ static struct hw_codec_info gen6_hw_codec_info = { extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, VAProfile); static struct hw_codec_info gen7_hw_codec_info = { .dec_hw_context_init = gen7_dec_hw_context_init, - .enc_hw_context_init = NULL, + .enc_hw_context_init = gen6_enc_hw_context_init, }; VAStatus diff --git a/i965_drv_video/i965_structs.h b/i965_drv_video/i965_structs.h index df59e45..12a8d14 100644 --- a/i965_drv_video/i965_structs.h +++ b/i965_drv_video/i965_structs.h @@ -1252,4 +1252,56 @@ struct gen7_sampler_state } ss3; }; +struct gen7_surface_state2 +{ + struct { + unsigned int surface_base_address; + } ss0; + + struct { + unsigned int cbcr_pixel_offset_v_direction:2; + unsigned int picture_structure:2; + unsigned int width:14; + unsigned int height:14; + } ss1; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int half_pitch_for_chroma:1; + unsigned int pitch:18; + unsigned int pad0:1; + unsigned int surface_object_control_data:4; + unsigned int pad1:1; + unsigned int interleave_chroma:1; + unsigned int surface_format:4; + } ss2; + + struct { + unsigned int y_offset_for_cb:15; + unsigned int pad0:1; + unsigned int x_offset_for_cb:14; + unsigned int pad1:2; + } ss3; + + struct { + unsigned int y_offset_for_cr:15; + unsigned int pad0:1; + unsigned int x_offset_for_cr:14; + unsigned int pad1:2; + } ss4; + + struct { + unsigned int pad0; + } ss5; + + struct { + unsigned int pad0; + } ss6; + + struct { + unsigned int pad0; + } ss7; +}; + #endif /* _I965_STRUCTS_H_ */ diff --git a/i965_drv_video/intel_batchbuffer.c b/i965_drv_video/intel_batchbuffer.c index f52bde4..f310793 100644 --- a/i965_drv_video/intel_batchbuffer.c +++ b/i965_drv_video/intel_batchbuffer.c @@ -166,10 +166,19 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) if (batch->flag == I915_EXEC_RENDER) { BEGIN_BATCH(batch, 4); OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2); - OUT_BATCH(batch, - CMD_PIPE_CONTROL_WC_FLUSH | - CMD_PIPE_CONTROL_TC_FLUSH | - CMD_PIPE_CONTROL_NOWRITE); + + if (IS_GEN6(intel->device_id)) + OUT_BATCH(batch, + CMD_PIPE_CONTROL_WC_FLUSH | + CMD_PIPE_CONTROL_TC_FLUSH | + CMD_PIPE_CONTROL_NOWRITE); + else + OUT_BATCH(batch, + CMD_PIPE_CONTROL_WC_FLUSH | + CMD_PIPE_CONTROL_TC_FLUSH | + CMD_PIPE_CONTROL_DC_FLUSH | + CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); ADVANCE_BATCH(batch); diff --git a/i965_drv_video/intel_driver.h b/i965_drv_video/intel_driver.h index a031935..e31360d 100644 --- a/i965_drv_video/intel_driver.h +++ b/i965_drv_video/intel_driver.h @@ -50,6 +50,7 @@ #define CMD_PIPE_CONTROL_IS_FLUSH (1 << 11) #define CMD_PIPE_CONTROL_TC_FLUSH (1 << 10) #define CMD_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5) #define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2) #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) -- 2.7.4