From 172ca9a68f394c90a18d99ba06d1daf5b2d162bd Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 12 May 2011 16:00:30 +0800 Subject: [PATCH] i965_drv_video: clean up batchbuffer interface Signed-off-by: Xiang, Haihao --- gen6_mfc.c | 317 ++++++++++++++----------- gen6_mfd.c | 471 ++++++++++++++++++++----------------- gen6_vme.c | 142 ++++++----- i965_avc_bsd.c | 245 ++++++++++--------- i965_avc_hw_scoreboard.c | 123 ++++++---- i965_avc_ildb.c | 120 ++++++---- i965_defines.h | 2 - i965_drv_video.c | 8 +- i965_media.c | 129 +++++----- i965_media_h264.c | 16 +- i965_media_mpeg2.c | 18 +- i965_post_processing.c | 220 +++++++++++------- i965_render.c | 595 +++++++++++++++++++++++++++-------------------- intel_batchbuffer.c | 404 +++++++++----------------------- intel_batchbuffer.h | 129 +++++----- intel_driver.c | 6 +- intel_driver.h | 17 +- 17 files changed, 1559 insertions(+), 1403 deletions(-) diff --git a/gen6_mfc.c b/gen6_mfc.c index 4540697..83540b4 100644 --- a/gen6_mfc.c +++ b/gen6_mfc.c @@ -41,10 +41,13 @@ static void gen6_mfc_pipe_mode_select(VADriverContextP ctx) { - BEGIN_BCS_BATCH(ctx,4); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; - OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch,4); + + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2)); + OUT_BCS_BATCH(batch, (0 << 10) | /* disable Stream-Out */ (1 << 9) | /* Post Deblocking Output */ (0 << 8) | /* Pre Deblocking Output */ @@ -52,7 +55,7 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx) (0 << 5) | /* not in stitch mode */ (1 << 4) | /* encoding mode */ (2 << 0)); /* Standard Select: AVC */ - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 20) | /* round flag in PB slice */ (0 << 19) | /* round flag in Intra8x8 */ (0 << 7) | /* expand NOA bus flag */ @@ -63,24 +66,26 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx) (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ (0 << 1) | /* AVC long field motion vector */ (0 << 0)); /* always calculate AVC ILDB boundary strength */ - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; - BEGIN_BCS_BATCH(ctx, 6); + BEGIN_BCS_BATCH(batch, 6); - OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, ((mfc_context->surface_state.height - 1) << 19) | ((mfc_context->surface_state.width - 1) << 6)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ @@ -88,112 +93,120 @@ gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e (0 << 2) | /* must be 0 for interleave U/V */ (1 << 1) | /* must be y-tiled */ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 16) | /* must be 0 for interleave U/V */ (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */ - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; int i; - BEGIN_BCS_BATCH(ctx, 24); + BEGIN_BCS_BATCH(batch, 24); - OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2)); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2)); - OUT_BCS_BATCH(ctx, 0); /* pre output addr */ + OUT_BCS_BATCH(batch, 0); /* pre output addr */ - OUT_BCS_RELOC(ctx, mfc_context->post_deblocking_output.bo, + OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* post output addr */ - OUT_BCS_RELOC(ctx, mfc_context->uncompressed_picture_source.bo, + OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* uncompressed data */ - OUT_BCS_BATCH(ctx, 0); /* StreamOut data*/ - OUT_BCS_RELOC(ctx, mfc_context->intra_row_store_scratch_buffer.bo, + OUT_BCS_BATCH(batch, 0); /* StreamOut data*/ + OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_RELOC(ctx, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, + OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* 7..22 Reference pictures*/ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { if ( mfc_context->reference_surfaces[i].bo != NULL) { - OUT_BCS_RELOC(ctx, mfc_context->reference_surfaces[i].bo, + OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); } else { - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); } } - OUT_BCS_BATCH(ctx, 0); /* no block status */ + OUT_BCS_BATCH(batch, 0); /* no block status */ - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; - BEGIN_BCS_BATCH(ctx, 11); + BEGIN_BCS_BATCH(batch, 11); - OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* MFX Indirect MV Object Base Address */ - OUT_BCS_RELOC(ctx, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /*MFC Indirect PAK-BSE Object Base Address for Encoder*/ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; - BEGIN_BCS_BATCH(ctx, 4); + BEGIN_BCS_BATCH(batch, 4); - OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2)); - OUT_BCS_RELOC(ctx, mfc_context->bsd_mpc_row_store_scratch_buffer.bo, + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2)); + OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; - BEGIN_BCS_BATCH(ctx, 13); - OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 13); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2)); + OUT_BCS_BATCH(batch, ((width_in_mbs * height_in_mbs) & 0xFFFF)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (height_in_mbs << 16) | (width_in_mbs << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 24) | /*Second Chroma QP Offset*/ (0 << 16) | /*Chroma QP Offset*/ (0 << 14) | /*Max-bit conformance Intra flag*/ @@ -202,7 +215,7 @@ gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e (0 << 10) | /*QM Preset FLag */ (0 << 8) | /*Image Structure*/ (0 << 0) ); /*Current Decoed Image Frame Store ID, reserved in Encode mode*/ - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 16) | /*Mininum Frame size*/ (0 << 15) | /*Disable reading of Macroblock Status Buffer*/ (0 << 14) | /*Load BitStream Pointer only once, 1 slic 1 frame*/ @@ -217,83 +230,87 @@ gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e (1 << 2) | /*Frame MB only flag*/ (0 << 1) | /*MBAFF mode is in active*/ (0 << 0) ); /*Field picture flag*/ - OUT_BCS_BATCH(ctx, 0); /*Mainly about MB rate control and debug, just ignoring*/ - OUT_BCS_BATCH(ctx, /*Inter and Intra Conformance Max size limit*/ + OUT_BCS_BATCH(batch, 0); /*Mainly about MB rate control and debug, just ignoring*/ + OUT_BCS_BATCH(batch, /*Inter and Intra Conformance Max size limit*/ (0xBB8 << 16) | /*InterMbMaxSz*/ (0xEE8) ); /*IntraMbMaxSz*/ - OUT_BCS_BATCH(ctx, 0); /*Reserved*/ - OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/ - OUT_BCS_BATCH(ctx, 0); /*Slice QP Delta for bitrate control*/ - OUT_BCS_BATCH(ctx, 0x8C000000); - OUT_BCS_BATCH(ctx, 0x00010000); - OUT_BCS_BATCH(ctx, 0); - - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); /*Reserved*/ + OUT_BCS_BATCH(batch, 0); /*Slice QP Delta for bitrate control*/ + OUT_BCS_BATCH(batch, 0); /*Slice QP Delta for bitrate control*/ + OUT_BCS_BATCH(batch, 0x8C000000); + OUT_BCS_BATCH(batch, 0x00010000); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_directmode_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i; - BEGIN_BCS_BATCH(ctx, 69); + BEGIN_BCS_BATCH(batch, 69); - OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); //TODO: reference DMV for(i = 0; i < 16; i++){ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } //TODO: current DMV just for test #if 0 - OUT_BCS_RELOC(ctx, mfc_context->direct_mv_buffers[0].bo, + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); #else //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000); - //OUT_BCS_BATCH(ctx, mfc_context->direct_mv_buffers[0].bo->offset); - OUT_BCS_BATCH(ctx, 0); + //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset); + OUT_BCS_BATCH(batch, 0); #endif - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); //TODO: POL list for(i = 0; i < 34; i++) { - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); } - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_slice_state(VADriverContextP ctx, int intra_slice, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; - BEGIN_BCS_BATCH(ctx, 11);; + BEGIN_BCS_BATCH(batch, 11);; - OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2) ); + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); if ( intra_slice ) - OUT_BCS_BATCH(ctx, 2); /*Slice Type: I Slice*/ + OUT_BCS_BATCH(batch, 2); /*Slice Type: I Slice*/ else - OUT_BCS_BATCH(ctx, 0); /*Slice Type: P Slice*/ + OUT_BCS_BATCH(batch, 0); /*Slice Type: P Slice*/ if ( intra_slice ) - OUT_BCS_BATCH(ctx, 0); /*no reference frames and pred_weight_table*/ + OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ else - OUT_BCS_BATCH(ctx, 0x00010000); /*1 reference frame*/ + OUT_BCS_BATCH(batch, 0x00010000); /*1 reference frame*/ - OUT_BCS_BATCH(ctx, (0<<24) | /*Enable deblocking operation*/ + OUT_BCS_BATCH(batch, (0<<24) | /*Enable deblocking operation*/ (26<<16) | /*Slice Quantization Parameter*/ 0x0202 ); - OUT_BCS_BATCH(ctx, 0); /*First MB X&Y , the postion of current slice*/ - OUT_BCS_BATCH(ctx, ( ((mfc_context->surface_state.height+15)/16) << 16) ); + OUT_BCS_BATCH(batch, 0); /*First MB X&Y , the postion of current slice*/ + OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) ); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0<<31) | /*RateControlCounterEnable = disable*/ (1<<30) | /*ResetRateControlCounter*/ (2<<28) | /*RC Triggle Mode = Loose Rate Control*/ @@ -305,91 +322,102 @@ static void gen6_mfc_avc_slice_state(VADriverContextP ctx, (1<<13) | /*RBSP NAL TYPE*/ (0<<12) ); /*CabacZeroWordInsertionEnable*/ - OUT_BCS_RELOC(ctx, mfc_context->mfc_indirect_pak_bse_object.bo, + OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, mfc_context->mfc_indirect_pak_bse_object.offset); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_qm_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i; - BEGIN_BCS_BATCH(ctx, 58); + BEGIN_BCS_BATCH(batch, 58); - OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | 56); - OUT_BCS_BATCH(ctx, 0xFF ) ; + OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56); + OUT_BCS_BATCH(batch, 0xFF ) ; for( i = 0; i < 56; i++) { - OUT_BCS_BATCH(ctx, 0x10101010); + OUT_BCS_BATCH(batch, 0x10101010); } - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_fqm_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i; - BEGIN_BCS_BATCH(ctx, 113); - OUT_BCS_BATCH(ctx, MFC_AVC_FQM_STATE | (113 - 2)); + BEGIN_BCS_BATCH(batch, 113); + OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2)); for(i = 0; i < 112;i++) { - OUT_BCS_BATCH(ctx, 0x10001000); + OUT_BCS_BATCH(batch, 0x10001000); } - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i; - BEGIN_BCS_BATCH(ctx, 10); + BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(ctx, 0); //Select L0 + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 0); //Select L0 - OUT_BCS_BATCH(ctx, 0x80808000); //Only 1 reference + OUT_BCS_BATCH(batch, 0x80808000); //Only 1 reference for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(ctx, 0x80808080); + OUT_BCS_BATCH(batch, 0x80808080); } - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data) { - BEGIN_BCS_BATCH(ctx, 4); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BCS_BATCH(batch, 4); - OUT_BCS_BATCH(ctx, MFC_AVC_INSERT_OBJECT | (4 -2 ) ); - OUT_BCS_BATCH(ctx, (32<<8) | + OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) ); + OUT_BCS_BATCH(batch, (32<<8) | (1 << 3) | (1 << 2) | (flush_data << 1) | (1<<0) ); - OUT_BCS_BATCH(ctx, 0x00000003); - OUT_BCS_BATCH(ctx, 0xABCD1234); + OUT_BCS_BATCH(batch, 0x00000003); + OUT_BCS_BATCH(batch, 0xABCD1234); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static int gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int len_in_dwords = 11; - BEGIN_BCS_BATCH(ctx, len_in_dwords); + BEGIN_BCS_BATCH(batch, len_in_dwords); - OUT_BCS_BATCH(ctx, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, (0 << 24) | /* PackedMvNum, Debug*/ (0 << 20) | /* No motion vector */ (1 << 19) | /* CbpDcY */ @@ -397,34 +425,36 @@ gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, in (1 << 17) | /* CbpDcV */ (msg[0] & 0xFFFF) ); - OUT_BCS_BATCH(ctx, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ - OUT_BCS_BATCH(ctx, 0x000F000F); /* Code Block Pattern */ - OUT_BCS_BATCH(ctx, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ /*Stuff for Intra MB*/ - OUT_BCS_BATCH(ctx, msg[1]); /* We using Intra16x16 no 4x4 predmode*/ - OUT_BCS_BATCH(ctx, msg[2]); - OUT_BCS_BATCH(ctx, msg[3]&0xFC); + OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/ + OUT_BCS_BATCH(batch, msg[2]); + OUT_BCS_BATCH(batch, msg[3]&0xFC); - OUT_BCS_BATCH(ctx, 0x8040000); /*MaxSizeInWord and TargetSzieInWord*/ + OUT_BCS_BATCH(batch, 0x8040000); /*MaxSizeInWord and TargetSzieInWord*/ - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); return len_in_dwords; } static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int len_in_dwords = 11; - BEGIN_BCS_BATCH(ctx, len_in_dwords); + BEGIN_BCS_BATCH(batch, len_in_dwords); - OUT_BCS_BATCH(ctx, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - OUT_BCS_BATCH(ctx, 32); /* 32 MV*/ - OUT_BCS_BATCH(ctx, offset); + OUT_BCS_BATCH(batch, 32); /* 32 MV*/ + OUT_BCS_BATCH(batch, offset); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (1 << 24) | /* PackedMvNum, Debug*/ (4 << 20) | /* 8 MV, SNB don't use it*/ (1 << 19) | /* CbpDcY */ @@ -441,18 +471,18 @@ static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int (0 << 2) | /* SkipMbFlag */ (0 << 0)); /* InterMbMode */ - OUT_BCS_BATCH(ctx, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ - OUT_BCS_BATCH(ctx, 0x000F000F); /* Code Block Pattern */ - OUT_BCS_BATCH(ctx, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ /*Stuff for Inter MB*/ - OUT_BCS_BATCH(ctx, 0x0); - OUT_BCS_BATCH(ctx, 0x0); - OUT_BCS_BATCH(ctx, 0x0); + OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(ctx, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/ + OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/ - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); return len_in_dwords; } @@ -517,6 +547,8 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context; struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer; @@ -528,7 +560,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; int x,y; - intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); if (is_intra) { dri_bo_map(vme_context->vme_output.bo , 1); @@ -541,7 +573,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, int qp = pSequenceParameter->initial_qp; if (emit_new_state) { - intel_batchbuffer_emit_mi_flush_bcs(ctx); + intel_batchbuffer_emit_mi_flush(batch); gen6_mfc_pipe_mode_select(ctx); gen6_mfc_surface_state(ctx, gen6_encoder_context); gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context); @@ -566,11 +598,11 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, offset += 64; } - if (intel_batchbuffer_check_free_space_bcs(ctx, object_len_in_bytes) == 0) { - intel_batchbuffer_end_atomic_bcs(ctx); - intel_batchbuffer_flush_bcs(ctx); + if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) { + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); emit_new_state = 1; - intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); } } } @@ -578,7 +610,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, if (is_intra) dri_bo_unmap(vme_context->vme_output.bo); - intel_batchbuffer_end_atomic_bcs(ctx); + intel_batchbuffer_end_atomic(batch); } static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, @@ -651,7 +683,10 @@ static VAStatus gen6_mfc_run(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) { - intel_batchbuffer_flush_bcs(ctx); //run the pipeline + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_flush(batch); //run the pipeline return VA_STATUS_SUCCESS; } diff --git a/gen6_mfd.c b/gen6_mfd.c index 78baaa4..bef3cac 100644 --- a/gen6_mfd.c +++ b/gen6_mfd.c @@ -242,13 +242,16 @@ gen6_mfd_pipe_mode_select(VADriverContextP ctx, int standard_select, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + assert(standard_select == MFX_FORMAT_MPEG2 || standard_select == MFX_FORMAT_AVC || standard_select == MFX_FORMAT_VC1); - BEGIN_BCS_BATCH(ctx, 4); - OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2)); + OUT_BCS_BATCH(batch, (MFD_MODE_VLD << 16) | /* VLD mode */ (0 << 10) | /* disable Stream-Out */ (gen6_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */ @@ -257,7 +260,7 @@ gen6_mfd_pipe_mode_select(VADriverContextP ctx, (0 << 5) | /* not in stitch mode */ (MFX_CODEC_DECODE << 4) | /* decoding mode */ (standard_select << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 20) | /* round flag in PB slice */ (0 << 19) | /* round flag in Intra8x8 */ (0 << 7) | /* expand NOA bus flag */ @@ -268,8 +271,8 @@ gen6_mfd_pipe_mode_select(VADriverContextP ctx, (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ (0 << 1) | /* AVC long field motion vector */ (1 << 0)); /* always calculate AVC ILDB boundary strength */ - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void @@ -277,17 +280,19 @@ gen6_mfd_surface_state(VADriverContextP ctx, struct decode_state *decode_state, int standard_select) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(decode_state->current_render_target); assert(obj_surface); - BEGIN_BCS_BATCH(ctx, 6); - OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, ((obj_surface->orig_height - 1) << 19) | ((obj_surface->orig_width - 1) << 6)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ @@ -295,11 +300,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, (0 << 2) | /* must be 0 for interleave U/V */ (1 << 1) | /* must be y-tiled */ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, FIXME: must be 1 ??? */ - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 16) | /* must be 0 for interleave U/V */ (obj_surface->height)); /* y offset for U(cb) */ - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void @@ -308,41 +313,43 @@ gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx, int standard_select, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); int i; - BEGIN_BCS_BATCH(ctx, 24); - OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2)); + BEGIN_BCS_BATCH(batch, 24); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2)); if (gen6_mfd_context->pre_deblocking_output.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->pre_deblocking_output.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); if (gen6_mfd_context->post_deblocking_output.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->post_deblocking_output.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */ - OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ if (gen6_mfd_context->intra_row_store_scratch_buffer.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->intra_row_store_scratch_buffer.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); /* DW 7..22 */ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { @@ -352,16 +359,16 @@ gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx, obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id); assert(obj_surface && obj_surface->bo); - OUT_BCS_RELOC(ctx, obj_surface->bo, + OUT_BCS_RELOC(batch, obj_surface->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } else { - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); } } - OUT_BCS_BATCH(ctx, 0); /* ignore DW23 for decoding */ - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); /* ignore DW23 for decoding */ + ADVANCE_BCS_BATCH(batch); } static void @@ -369,19 +376,22 @@ gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx, dri_bo *slice_data_bo, int standard_select) { - BEGIN_BCS_BATCH(ctx, 11); - OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); - OUT_BCS_RELOC(ctx, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */ - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void @@ -390,31 +400,34 @@ gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx, int standard_select, struct gen6_mfd_context *gen6_mfd_context) { - BEGIN_BCS_BATCH(ctx, 4); - OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2)); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2)); if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->mpr_row_store_scratch_buffer.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); if (gen6_mfd_context->bitplane_read_buffer.valid) - OUT_BCS_RELOC(ctx, gen6_mfd_context->bitplane_read_buffer.bo, + OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -430,14 +443,19 @@ gen6_mfd_wait(VADriverContextP ctx, struct decode_state *decode_state, int standard_select) { - BEGIN_BCS_BATCH(ctx, 1); - OUT_BCS_BATCH(ctx, MFX_WAIT | (1 << 8)); - ADVANCE_BCS_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BCS_BATCH(batch, 1); + OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8)); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int qm_present_flag; int img_struct; int mbaff_frame_flag; @@ -485,14 +503,14 @@ gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state) pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */ assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */ - BEGIN_BCS_BATCH(ctx, 13); - OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 13); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2)); + OUT_BCS_BATCH(batch, ((width_in_mbs * height_in_mbs) & 0x7fff)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (height_in_mbs << 16) | (width_in_mbs << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) | ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */ @@ -501,7 +519,7 @@ gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state) (qm_present_flag << 10) | (img_struct << 8) | (16 << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (pic_param->seq_fields.bits.chroma_format_idc << 10) | (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) | ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) | @@ -511,20 +529,22 @@ gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state) (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) | (mbaff_frame_flag << 1) | (pic_param->pic_fields.bits.field_pic_flag << 0)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_avc_qm_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int cmd_len; VAIQMatrixBufferH264 *iq_matrix; VAPictureParameterBufferH264 *pic_param; @@ -542,24 +562,24 @@ gen6_mfd_avc_qm_state(VADriverContextP ctx, struct decode_state *decode_state) if (pic_param->pic_fields.bits.transform_8x8_mode_flag) cmd_len += 2 * 16; /* load two 8x8 scaling matrices */ - BEGIN_BCS_BATCH(ctx, cmd_len); - OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | (cmd_len - 2)); + BEGIN_BCS_BATCH(batch, cmd_len); + OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2)); if (pic_param->pic_fields.bits.transform_8x8_mode_flag) - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0x0 << 8) | /* don't use default built-in matrices */ (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */ else - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0x0 << 8) | /* don't use default built-in matrices */ (0x3f << 0)); /* six 4x4 scaling matrices */ - intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4); + intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4); if (pic_param->pic_fields.bits.transform_8x8_mode_flag) - intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4); + intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -568,14 +588,16 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface; struct gen6_avc_surface *gen6_avc_surface; VAPictureH264 *va_pic; int i, j; - BEGIN_BCS_BATCH(ctx, 69); - OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); + BEGIN_BCS_BATCH(batch, 69); + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); /* reference surfaces 0..15 */ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { @@ -585,25 +607,25 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, gen6_avc_surface = obj_surface->private_data; if (gen6_avc_surface == NULL) { - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } else { - OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top, + OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); if (gen6_avc_surface->dmv_bottom_flag == 1) - OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_bottom, + OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); else - OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top, + OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } } else { - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } } @@ -614,16 +636,16 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, assert(obj_surface && obj_surface->bo && obj_surface->private_data); gen6_avc_surface = obj_surface->private_data; - OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top, + OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); if (gen6_avc_surface->dmv_bottom_flag == 1) - OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_bottom, + OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top, + OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); @@ -646,19 +668,19 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, assert(found == 1); assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt); - OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } } va_pic = &pic_param->CurrPic; - OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt); - OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -667,6 +689,8 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, VASliceParameterBufferH264 *next_slice_param) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos; @@ -717,15 +741,15 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx, next_slice_ver_pos = height_in_mbs; } - BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */ - OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2)); - OUT_BCS_BATCH(ctx, slice_type); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, slice_type); + OUT_BCS_BATCH(batch, (num_ref_idx_l1 << 24) | (num_ref_idx_l0 << 16) | (slice_param->chroma_log2_weight_denom << 8) | (slice_param->luma_log2_weight_denom << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | (slice_param->direct_spatial_mv_pred_flag << 29) | (slice_param->disable_deblocking_filter_idc << 27) | @@ -733,43 +757,45 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx, ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) | ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (slice_ver_pos << 24) | (slice_hor_pos << 16) | (first_mb_in_slice << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (next_slice_ver_pos << 16) | (next_slice_hor_pos << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (next_slice_param == NULL) << 19); /* last slice flag */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ - BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */ - OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, height_in_mbs << 24 | width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void @@ -778,6 +804,8 @@ gen6_mfd_avc_ref_idx_state(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i, j, num_ref_list; struct { unsigned char bottom_idc:1; @@ -807,9 +835,9 @@ gen6_mfd_avc_ref_idx_state(VADriverContextP ctx, va_pic = slice_param->RefPicList1; } - BEGIN_BCS_BATCH(ctx, 10); - OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | (10 - 2)); - OUT_BCS_BATCH(ctx, i); + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | (10 - 2)); + OUT_BCS_BATCH(batch, i); for (j = 0; j < 32; j++) { if (va_pic->flags & VA_PICTURE_H264_INVALID) { @@ -843,8 +871,8 @@ gen6_mfd_avc_ref_idx_state(VADriverContextP ctx, va_pic++; } - intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs)); - ADVANCE_BCS_BATCH(ctx); + intel_batchbuffer_data(batch, refs, sizeof(refs)); + ADVANCE_BCS_BATCH(batch); } } @@ -853,6 +881,8 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, VASliceParameterBufferH264 *slice_param) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i, j, num_weight_offset_table = 0; short weightoffsets[32 * 6]; @@ -868,9 +898,9 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx, } for (i = 0; i < num_weight_offset_table; i++) { - BEGIN_BCS_BATCH(ctx, 98); - OUT_BCS_BATCH(ctx, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2)); - OUT_BCS_BATCH(ctx, i); + BEGIN_BCS_BATCH(batch, 98); + OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2)); + OUT_BCS_BATCH(batch, i); if (i == 0) { for (j = 0; j < 32; j++) { @@ -892,8 +922,8 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx, } } - intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets)); - ADVANCE_BCS_BATCH(ctx); + intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets)); + ADVANCE_BCS_BATCH(batch); } } @@ -924,6 +954,8 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, dri_bo *slice_data_bo) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int slice_data_bit_offset; uint8_t *slice_data = NULL; @@ -934,36 +966,39 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx, slice_param->slice_data_bit_offset); dri_bo_unmap(slice_data_bo); - BEGIN_BCS_BATCH(ctx, 6); - OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0)); - OUT_BCS_BATCH(ctx, slice_param->slice_data_offset + (slice_data_bit_offset >> 3)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3)); + OUT_BCS_BATCH(batch, (0 << 31) | (0 << 14) | (0 << 12) | (0 << 10) | (0 << 8)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 16) | (0 << 6) | ((0x7 - (slice_data_bit_offset & 0x7)) << 0)); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param) { - BEGIN_BCS_BATCH(ctx, 6); - OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void @@ -1092,6 +1127,8 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, struct decode_state *decode_state, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferH264 *pic_param; VASliceParameterBufferH264 *slice_param, *next_slice_param; dri_bo *slice_data_bo; @@ -1101,8 +1138,8 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context); - intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush_bcs(ctx); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context); gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC); gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context); @@ -1144,8 +1181,8 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, } gen6_mfd_avc_phantom_slice(ctx, pic_param); - intel_batchbuffer_end_atomic_bcs(ctx); - intel_batchbuffer_flush_bcs(ctx); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); } static void @@ -1225,14 +1262,16 @@ gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, static void gen6_mfd_mpeg2_pic_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferMPEG2 *pic_param; assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; - BEGIN_BCS_BATCH(ctx, 4); - OUT_BCS_BATCH(ctx, MFX_MPEG2_PIC_STATE | (4 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2)); + OUT_BCS_BATCH(batch, (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */ ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */ ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */ @@ -1245,17 +1284,19 @@ gen6_mfd_mpeg2_pic_state(VADriverContextP ctx, struct decode_state *decode_state pic_param->picture_coding_extension.bits.q_scale_type << 8 | pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | pic_param->picture_coding_extension.bits.alternate_scan << 6); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, pic_param->picture_coding_type << 9); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (ALIGN(pic_param->vertical_size, 16) / 16) << 16 | (ALIGN(pic_param->horizontal_size, 16) / 16)); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_mpeg2_qm_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAIQMatrixBufferMPEG2 *iq_matrix; int i; @@ -1288,11 +1329,11 @@ gen6_mfd_mpeg2_qm_state(VADriverContextP ctx, struct decode_state *decode_state) qmx[m] = qm[k]; } - BEGIN_BCS_BATCH(ctx, 18); - OUT_BCS_BATCH(ctx, MFX_MPEG2_QM_STATE | (18 - 2)); - OUT_BCS_BATCH(ctx, i); - intel_batchbuffer_data_bcs(ctx, qmx, 64); - ADVANCE_BCS_BATCH(ctx); + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, i); + intel_batchbuffer_data(batch, qmx, 64); + ADVANCE_BCS_BATCH(batch); } } @@ -1302,6 +1343,8 @@ gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx, VASliceParameterBufferMPEG2 *slice_param, VASliceParameterBufferMPEG2 *next_slice_param) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; unsigned int height_in_mbs = ALIGN(pic_param->vertical_size, 16) / 16; int mb_count; @@ -1313,22 +1356,22 @@ gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx, mb_count = (next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) - (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position); - BEGIN_BCS_BATCH(ctx, 5); - OUT_BCS_BATCH(ctx, MFD_MPEG2_BSD_OBJECT | (5 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(batch, slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, slice_param->slice_horizontal_position << 24 | slice_param->slice_vertical_position << 16 | mb_count << 8 | (next_slice_param == NULL) << 5 | (next_slice_param == NULL) << 3 | (slice_param->macroblock_offset & 0x7)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, slice_param->quantiser_scale_code << 24); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -1336,6 +1379,8 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, struct decode_state *decode_state, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferMPEG2 *pic_param; VASliceParameterBufferMPEG2 *slice_param, *next_slice_param; dri_bo *slice_data_bo; @@ -1345,8 +1390,8 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context); - intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush_bcs(ctx); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context); gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2); gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context); @@ -1374,8 +1419,8 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, } } - intel_batchbuffer_end_atomic_bcs(ctx); - intel_batchbuffer_flush_bcs(ctx); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); } static const int va_to_gen6_vc1_pic_type[5] = { @@ -1610,6 +1655,8 @@ gen6_mfd_vc1_decode_init(VADriverContextP ctx, static void gen6_mfd_vc1_pic_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferVC1 *pic_param; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface; @@ -1760,12 +1807,12 @@ gen6_mfd_vc1_pic_state(VADriverContextP ctx, struct decode_state *decode_state) assert(pic_param->conditional_overlap_flag < 3); assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */ - BEGIN_BCS_BATCH(ctx, 6); - OUT_BCS_BATCH(ctx, MFX_VC1_PIC_STATE | (6 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, (ALIGN(pic_param->coded_height, 16) / 16) << 16 | (ALIGN(pic_param->coded_width, 16) / 16)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, pic_param->sequence_fields.bits.syncmarker << 31 | 1 << 29 | /* concealment */ alt_pq << 24 | @@ -1781,7 +1828,7 @@ gen6_mfd_vc1_pic_state(VADriverContextP ctx, struct decode_state *decode_state) !pic_param->picture_fields.bits.is_first_field << 5 | picture_type << 2 | fcm << 0); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, !!pic_param->bitplane_present.value << 23 | !pic_param->bitplane_present.flags.bp_forward_mb << 22 | !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 | @@ -1798,7 +1845,7 @@ gen6_mfd_vc1_pic_state(VADriverContextP ctx, struct decode_state *decode_state) ref_field_pic_polarity << 6 | pic_param->reference_fields.bits.num_reference_pictures << 5 | pic_param->reference_fields.bits.reference_distance << 0); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, scale_factor << 24 | pic_param->mv_fields.bits.mv_table << 20 | pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 | @@ -1810,16 +1857,18 @@ gen6_mfd_vc1_pic_state(VADriverContextP ctx, struct decode_state *decode_state) pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 | pic_param->transform_fields.bits.intra_transform_dc_table << 3 | pic_param->cbp_table << 0); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, dmv_surface_valid << 13 | brfd << 8 | ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1)); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferVC1 *pic_param; int interpolation_mode = 0; int intensitycomp_single; @@ -1842,19 +1891,19 @@ gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx, struct decode_state *decode_s pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation); - BEGIN_BCS_BATCH(ctx, 7); - OUT_BCS_BATCH(ctx, MFX_VC1_PRED_PIPE_STATE | (7 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 7); + OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2)); + OUT_BCS_BATCH(batch, 0 << 8 | /* FIXME: interlace mode */ pic_param->rounding_control << 4 | va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, pic_param->luma_shift << 16 | pic_param->luma_scale << 0); /* FIXME: Luma Scaling */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, interpolation_mode << 19 | pic_param->fast_uvmc_flag << 18 | 0 << 17 | /* FIXME: scale up or down ??? */ @@ -1863,13 +1912,15 @@ gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx, struct decode_state *decode_s 0 << 4 | intensitycomp_single << 2 | intensitycomp_single << 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void gen6_mfd_vc1_directmode_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferVC1 *pic_param; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface; @@ -1890,24 +1941,24 @@ gen6_mfd_vc1_directmode_state(VADriverContextP ctx, struct decode_state *decode_ dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv; } - BEGIN_BCS_BATCH(ctx, 3); - OUT_BCS_BATCH(ctx, MFX_VC1_DIRECTMODE_STATE | (3 - 2)); + BEGIN_BCS_BATCH(batch, 3); + OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2)); if (dmv_write_buffer) - OUT_BCS_RELOC(ctx, dmv_write_buffer, + OUT_BCS_RELOC(batch, dmv_write_buffer, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); if (dmv_read_buffer) - OUT_BCS_RELOC(ctx, dmv_read_buffer, + OUT_BCS_RELOC(batch, dmv_read_buffer, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static int @@ -1939,6 +1990,8 @@ gen6_mfd_vc1_bsd_object(VADriverContextP ctx, VASliceParameterBufferVC1 *next_slice_param, dri_bo *slice_data_bo) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int next_slice_start_vert_pos; int macroblock_offset; uint8_t *slice_data = NULL; @@ -1955,17 +2008,17 @@ gen6_mfd_vc1_bsd_object(VADriverContextP ctx, else next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16; - BEGIN_BCS_BATCH(ctx, 4); - OUT_BCS_BATCH(ctx, MFD_VC1_BSD_OBJECT | (4 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2)); + OUT_BCS_BATCH(batch, slice_param->slice_data_size - (macroblock_offset >> 3)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (macroblock_offset >> 3)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, slice_param->slice_vertical_position << 24 | next_slice_start_vert_pos << 16 | (macroblock_offset & 0x7)); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -1973,6 +2026,8 @@ gen6_mfd_vc1_decode_picture(VADriverContextP ctx, struct decode_state *decode_state, struct gen6_mfd_context *gen6_mfd_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAPictureParameterBufferVC1 *pic_param; VASliceParameterBufferVC1 *slice_param, *next_slice_param; dri_bo *slice_data_bo; @@ -1982,8 +2037,8 @@ gen6_mfd_vc1_decode_picture(VADriverContextP ctx, pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context); - intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush_bcs(ctx); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context); gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1); gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context); @@ -2012,8 +2067,8 @@ gen6_mfd_vc1_decode_picture(VADriverContextP ctx, } } - intel_batchbuffer_end_atomic_bcs(ctx); - intel_batchbuffer_flush_bcs(ctx); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); } static void diff --git a/gen6_vme.c b/gen6_vme.c index 1273ff7..2795918 100644 --- a/gen6_vme.c +++ b/gen6_vme.c @@ -436,85 +436,97 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, static void gen6_vme_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); } static void gen6_vme_state_base_address(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 10); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 10); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 8); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 8); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //General State Base Address - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Surface State Base Address - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Surface State Base Address + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address - OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound - OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound - OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound - OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound /* - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address - OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; - BEGIN_BATCH(ctx, 8); + BEGIN_BATCH(batch, 8); - OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | 6); /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */ - OUT_BATCH(ctx, 0); /*Scratch Space Base Pointer and Space*/ - OUT_BATCH(ctx, (vme_context->vfe_state.max_num_threads << 16) + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6); /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */ + OUT_BATCH(batch, 0); /*Scratch Space Base Pointer and Space*/ + OUT_BATCH(batch, (vme_context->vfe_state.max_num_threads << 16) | (vme_context->vfe_state.num_urb_entries << 8) | (vme_context->vfe_state.gpgpu_mode << 2) ); /*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/ - OUT_BATCH(ctx, 0); /*Debug: Object ID*/ - OUT_BATCH(ctx, (vme_context->vfe_state.urb_entry_size << 16) + OUT_BATCH(batch, 0); /*Debug: Object ID*/ + OUT_BATCH(batch, (vme_context->vfe_state.urb_entry_size << 16) | vme_context->vfe_state.curbe_allocation_size); /*URB Entry Allocation Size , CURBE Allocation Size*/ - OUT_BATCH(ctx, 0); /*Disable Scoreboard*/ - OUT_BATCH(ctx, 0); /*Disable Scoreboard*/ - OUT_BATCH(ctx, 0); /*Disable Scoreboard*/ + OUT_BATCH(batch, 0); /*Disable Scoreboard*/ + OUT_BATCH(batch, 0); /*Disable Scoreboard*/ + OUT_BATCH(batch, 0); /*Disable Scoreboard*/ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; - BEGIN_BATCH(ctx, 4); + BEGIN_BATCH(batch, 4); - OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | 2); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, CURBE_TOTAL_DATA_LENGTH); - OUT_RELOC(ctx, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(batch, CURBE_TOTAL_DATA_LENGTH); + OUT_RELOC(batch, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void gen6_vme_idrt(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context; - BEGIN_BATCH(ctx, 4); + BEGIN_BATCH(batch, 4); - OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_LOAD | 2); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data)); - OUT_RELOC(ctx, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data)); + OUT_RELOC(batch, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static int gen6_vme_media_object(VADriverContextP ctx, @@ -522,23 +534,25 @@ static int gen6_vme_media_object(VADriverContextP ctx, int mb_x, int mb_y, int kernel) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(encode_state->current_render_target); int mb_width = ALIGN(obj_surface->orig_width, 16) / 16; int len_in_dowrds = 6 + 1; - BEGIN_BATCH(ctx, len_in_dowrds); + BEGIN_BATCH(batch, len_in_dowrds); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | (len_in_dowrds - 2)); - OUT_BATCH(ctx, kernel); /*Interface Descriptor Offset*/ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2)); + OUT_BATCH(batch, kernel); /*Interface Descriptor Offset*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /*inline data */ - OUT_BATCH(ctx, mb_width << 16 | mb_y << 8 | mb_x); /*M0.0 Refrence0 X,Y, not used in Intra*/ - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, mb_width << 16 | mb_y << 8 | mb_x); /*M0.0 Refrence0 X,Y, not used in Intra*/ + ADVANCE_BATCH(batch); return len_in_dowrds * 4; } @@ -603,6 +617,8 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer; int is_intra = pSliceParameter->slice_flags.bits.is_intra; @@ -611,19 +627,19 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, int emit_new_state = 1, object_len_in_bytes; int x, y; - intel_batchbuffer_start_atomic(ctx, 0x1000); + intel_batchbuffer_start_atomic(batch, 0x1000); for(y = 0; y < height_in_mbs; y++){ for(x = 0; x < width_in_mbs; x++){ if (emit_new_state) { /*Step1: MI_FLUSH/PIPE_CONTROL*/ - BEGIN_BATCH(ctx, 4); - OUT_BATCH(ctx, CMD_PIPE_CONTROL | 0x02); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x02); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); /*Step2: State command PIPELINE_SELECT*/ gen6_vme_pipeline_select(ctx); @@ -640,16 +656,17 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, /*Step4: Primitive commands*/ object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER); - if (intel_batchbuffer_check_free_space(ctx, object_len_in_bytes) == 0) { - intel_batchbuffer_end_atomic(ctx); - intel_batchbuffer_flush(ctx); + if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) { + assert(0); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); emit_new_state = 1; - intel_batchbuffer_start_atomic(ctx, 0x1000); + intel_batchbuffer_start_atomic(batch, 0x1000); } } } - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } static VAStatus gen6_vme_prepare(VADriverContextP ctx, @@ -676,7 +693,10 @@ static VAStatus gen6_vme_run(VADriverContextP ctx, struct encode_state *encode_state, struct gen6_encoder_context *gen6_encoder_context) { - intel_batchbuffer_flush(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_flush(batch); return VA_STATUS_SUCCESS; } diff --git a/i965_avc_bsd.c b/i965_avc_bsd.c index 41fb8fc..dbda138 100644 --- a/i965_avc_bsd.c +++ b/i965_avc_bsd.c @@ -99,15 +99,18 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, static void i965_bsd_ind_obj_base_address(VADriverContextP ctx, struct decode_state *decode_state, int slice) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + dri_bo *ind_bo = decode_state->slice_datas[slice]->bo; - BEGIN_BCS_BATCH(ctx, 3); - OUT_BCS_BATCH(ctx, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2)); - OUT_BCS_RELOC(ctx, ind_bo, + BEGIN_BCS_BATCH(batch, 3); + OUT_BCS_BATCH(batch, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2)); + OUT_BCS_RELOC(batch, ind_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } static void @@ -115,6 +118,8 @@ i965_avc_bsd_img_state(VADriverContextP ctx, struct decode_state *decode_state, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int qm_present_flag; int img_struct; int mbaff_frame_flag; @@ -167,14 +172,14 @@ i965_avc_bsd_img_state(VADriverContextP ctx, avc_it_command_header = (CMD_MEDIA_OBJECT_EX | (12 - 2)); - BEGIN_BCS_BATCH(ctx, 6); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_IMG_STATE | (6 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_IMG_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, ((width_in_mbs * height_in_mbs) & 0x7fff)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (height_in_mbs << 16) | (width_in_mbs << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) | ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | (SCAN_RASTER_ORDER << 15) | /* AVC ILDB Data */ @@ -184,7 +189,7 @@ i965_avc_bsd_img_state(VADriverContextP ctx, (qm_present_flag << 10) | (img_struct << 8) | (16 << 0)); /* FIXME: always support 16 reference frames ??? */ - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (RESIDUAL_DATA_OFFSET << 24) | /* residual data offset */ (0 << 17) | /* don't overwrite SRT */ (0 << 16) | /* Un-SRT (Unsynchronized Root Thread) */ @@ -199,13 +204,15 @@ i965_avc_bsd_img_state(VADriverContextP ctx, (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) | (mbaff_frame_flag << 1) | (pic_param->pic_fields.bits.field_pic_flag << 0)); - OUT_BCS_BATCH(ctx, avc_it_command_header); - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, avc_it_command_header); + ADVANCE_BCS_BATCH(batch); } static void i965_avc_bsd_qm_state(VADriverContextP ctx, struct decode_state *decode_state) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int cmd_len; VAIQMatrixBufferH264 *iq_matrix; VAPictureParameterBufferH264 *pic_param; @@ -223,24 +230,24 @@ i965_avc_bsd_qm_state(VADriverContextP ctx, struct decode_state *decode_state) if (pic_param->pic_fields.bits.transform_8x8_mode_flag) cmd_len += 2 * 16; /* load two 8x8 scaling matrices */ - BEGIN_BCS_BATCH(ctx, cmd_len); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_QM_STATE | (cmd_len - 2)); + BEGIN_BCS_BATCH(batch, cmd_len); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_QM_STATE | (cmd_len - 2)); if (pic_param->pic_fields.bits.transform_8x8_mode_flag) - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0x0 << 8) | /* don't use default built-in matrices */ (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */ else - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0x0 << 8) | /* don't use default built-in matrices */ (0x3f << 0)); /* six 4x4 scaling matrices */ - intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4); + intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4); if (pic_param->pic_fields.bits.transform_8x8_mode_flag) - intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4); + intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -249,6 +256,8 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int present_flag, cmd_len, list, j; struct { unsigned char bottom_idc:1; @@ -288,9 +297,9 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, cmd_len += 96; } - BEGIN_BCS_BATCH(ctx, cmd_len); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2)); - OUT_BCS_BATCH(ctx, present_flag); + BEGIN_BCS_BATCH(batch, cmd_len); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2)); + OUT_BCS_BATCH(batch, present_flag); for (list = 0; list < 2; list++) { int flag; @@ -339,7 +348,7 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, va_pic++; } - intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs)); + intel_batchbuffer_data(batch, refs, sizeof(refs)); } i965_h264_context->weight128_luma_l0 = 0; @@ -378,7 +387,7 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, } } - intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets)); + intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets)); } if (present_flag & PRESENT_WEIGHT_OFFSET_L1) { @@ -407,10 +416,10 @@ i965_avc_bsd_slice_state(VADriverContextP ctx, } } - intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets)); + intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets)); } - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } static void @@ -419,6 +428,8 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_avc_bsd_context *i965_avc_bsd_context; int i, j; @@ -428,27 +439,27 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context; - BEGIN_BCS_BATCH(ctx, 74); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2)); - OUT_BCS_RELOC(ctx, i965_avc_bsd_context->bsd_raw_store.bo, + BEGIN_BCS_BATCH(batch, 74); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2)); + OUT_BCS_RELOC(batch, i965_avc_bsd_context->bsd_raw_store.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_RELOC(ctx, i965_avc_bsd_context->mpr_row_store.bo, + OUT_BCS_RELOC(batch, i965_avc_bsd_context->mpr_row_store.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, + OUT_BCS_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES); - OUT_BCS_RELOC(ctx, i965_h264_context->avc_it_data.bo, + OUT_BCS_RELOC(batch, i965_h264_context->avc_it_data.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, (i965_h264_context->avc_it_data.write_offset << 6)); if (i965_h264_context->enable_avc_ildb) - OUT_BCS_RELOC(ctx, i965_h264_context->avc_ildb_data.bo, + OUT_BCS_RELOC(batch, i965_h264_context->avc_ildb_data.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) { @@ -473,26 +484,26 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, avc_bsd_surface = obj_surface->private_data; if (avc_bsd_surface == NULL) { - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } else { - OUT_BCS_RELOC(ctx, avc_bsd_surface->dmv_top, + OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); if (avc_bsd_surface->dmv_bottom_flag == 1) - OUT_BCS_RELOC(ctx, avc_bsd_surface->dmv_bottom, + OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); else - OUT_BCS_RELOC(ctx, avc_bsd_surface->dmv_top, + OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } } } else { - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } } @@ -512,16 +523,16 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, 0x1000); } - OUT_BCS_RELOC(ctx, avc_bsd_surface->dmv_top, + OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); if (avc_bsd_surface->dmv_bottom_flag == 1) - OUT_BCS_RELOC(ctx, avc_bsd_surface->dmv_bottom, + OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); else - OUT_BCS_RELOC(ctx, avc_bsd_surface->dmv_top, + OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); @@ -544,20 +555,20 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, assert(found == 1); if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) { - OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt); - OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } } else { - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); } } va_pic = &pic_param->CurrPic; - OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt); - OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } /* @@ -597,6 +608,8 @@ g4x_avc_bsd_object(VADriverContextP ctx, int slice_index, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ @@ -661,15 +674,15 @@ g4x_avc_bsd_object(VADriverContextP ctx, slice_hor_pos = first_mb_in_slice % width_in_mbs; slice_ver_pos = first_mb_in_slice / width_in_mbs; - BEGIN_BCS_BATCH(ctx, cmd_len); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (cmd_len - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, cmd_len); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (cmd_len - 2)); + OUT_BCS_BATCH(batch, (encrypted << 31) | ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (slice_param->slice_data_offset + (slice_data_bit_offset >> 3))); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */ (0 << 14) | /* ignore BSDPrematureComplete Error handling */ (0 << 13) | /* FIXME: ??? */ @@ -677,12 +690,12 @@ g4x_avc_bsd_object(VADriverContextP ctx, (0 << 10) | /* ignore Entropy Error handling */ (0 << 8) | /* ignore MB Header Error handling */ (slice_type << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (num_ref_idx_l1 << 24) | (num_ref_idx_l0 << 16) | (slice_param->chroma_log2_weight_denom << 8) | (slice_param->luma_log2_weight_denom << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | (slice_param->direct_spatial_mv_pred_flag << 29) | (slice_param->disable_deblocking_filter_idc << 27) | @@ -690,30 +703,30 @@ g4x_avc_bsd_object(VADriverContextP ctx, ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) | ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (slice_ver_pos << 24) | (slice_hor_pos << 16) | (first_mb_in_slice << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 7) | /* FIXME: ??? */ ((0x7 - (slice_data_bit_offset & 0x7)) << 0)); if (encrypted) { - OUT_BCS_BATCH(ctx, counter_value); + OUT_BCS_BATCH(batch, counter_value); } - ADVANCE_BCS_BATCH(ctx); + ADVANCE_BCS_BATCH(batch); } else { - BEGIN_BCS_BATCH(ctx, 8); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (8 - 2)); - OUT_BCS_BATCH(ctx, 0); /* indirect data length for phantom slice is 0 */ - OUT_BCS_BATCH(ctx, 0); /* indirect data start address for phantom slice is 0 */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + BEGIN_BCS_BATCH(batch, 8); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (8 - 2)); + OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */ + OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } } @@ -725,6 +738,8 @@ ironlake_avc_bsd_object(VADriverContextP ctx, int slice_index, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ @@ -788,17 +803,17 @@ ironlake_avc_bsd_object(VADriverContextP ctx, slice_hor_pos = first_mb_in_slice % width_in_mbs; slice_ver_pos = first_mb_in_slice / width_in_mbs; - BEGIN_BCS_BATCH(ctx, 16); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (16 - 2)); - OUT_BCS_BATCH(ctx, + BEGIN_BCS_BATCH(batch, 16); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2)); + OUT_BCS_BATCH(batch, (encrypted << 31) | (0 << 30) | /* FIXME: packet based bit stream */ (0 << 29) | /* FIXME: packet format */ ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (slice_param->slice_data_offset + (slice_data_bit_offset >> 3))); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */ (0 << 14) | /* ignore BSDPrematureComplete Error handling */ (0 << 13) | /* FIXME: ??? */ @@ -806,12 +821,12 @@ ironlake_avc_bsd_object(VADriverContextP ctx, (0 << 10) | /* ignore Entropy Error handling */ (0 << 8) | /* ignore MB Header Error handling */ (slice_type << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (num_ref_idx_l1 << 24) | (num_ref_idx_l0 << 16) | (slice_param->chroma_log2_weight_denom << 8) | (slice_param->luma_log2_weight_denom << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | (slice_param->direct_spatial_mv_pred_flag << 29) | (slice_param->disable_deblocking_filter_idc << 27) | @@ -819,44 +834,44 @@ ironlake_avc_bsd_object(VADriverContextP ctx, ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) | ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (slice_ver_pos << 24) | (slice_hor_pos << 16) | (first_mb_in_slice << 0)); - OUT_BCS_BATCH(ctx, + OUT_BCS_BATCH(batch, (0 << 7) | /* FIXME: ??? */ ((0x7 - (slice_data_bit_offset & 0x7)) << 0)); - OUT_BCS_BATCH(ctx, counter_value); + OUT_BCS_BATCH(batch, counter_value); /* FIXME: dw9-dw11 */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, i965_h264_context->weight128_luma_l0); - OUT_BCS_BATCH(ctx, i965_h264_context->weight128_luma_l1); - OUT_BCS_BATCH(ctx, i965_h264_context->weight128_chroma_l0); - OUT_BCS_BATCH(ctx, i965_h264_context->weight128_chroma_l1); - - ADVANCE_BCS_BATCH(ctx); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l0); + OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l1); + OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l0); + OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l1); + + ADVANCE_BCS_BATCH(batch); } else { - BEGIN_BCS_BATCH(ctx, 16); - OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (16 - 2)); - OUT_BCS_BATCH(ctx, 0); /* indirect data length for phantom slice is 0 */ - OUT_BCS_BATCH(ctx, 0); /* indirect data start address for phantom slice is 0 */ - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + BEGIN_BCS_BATCH(batch, 16); + OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2)); + OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */ + OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); } } @@ -1010,6 +1025,8 @@ i965_avc_bsd_frame_store_index(VADriverContextP ctx, void i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context; VAPictureParameterBufferH264 *pic_param; VASliceParameterBufferH264 *slice_param; @@ -1044,7 +1061,7 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v } } - intel_batchbuffer_start_atomic_bcs(ctx, 0x1000); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); i965_avc_bsd_img_state(ctx, decode_state, i965_h264_context); i965_avc_bsd_qm_state(ctx, decode_state); @@ -1077,9 +1094,9 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v } i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, i965_h264_context); - intel_batchbuffer_emit_mi_flush_bcs(ctx); - intel_batchbuffer_end_atomic_bcs(ctx); - intel_batchbuffer_flush_bcs(ctx); + intel_batchbuffer_emit_mi_flush(batch); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); } void diff --git a/i965_avc_hw_scoreboard.c b/i965_avc_hw_scoreboard.c index 9e2263f..a5ec277 100644 --- a/i965_avc_hw_scoreboard.c +++ b/i965_avc_hw_scoreboard.c @@ -198,93 +198,113 @@ i965_avc_hw_scoreboard_states_setup(struct i965_avc_hw_scoreboard_context *avc_h static void i965_avc_hw_scoreboard_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); } static void i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned int vfe_fence, cs_fence; vfe_fence = avc_hw_scoreboard_context->urb.cs_start; cs_fence = URB_SIZE((&i965->intel)); - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */ (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_avc_hw_scoreboard_state_base_address(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 8); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } static void i965_avc_hw_scoreboard_state_pointers(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context) { - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1); - OUT_BATCH(ctx, 0); - OUT_RELOC(ctx, avc_hw_scoreboard_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1); + OUT_BATCH(batch, 0); + OUT_RELOC(batch, avc_hw_scoreboard_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(batch); } static void i965_avc_hw_scoreboard_cs_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CS_URB_STATE | 0); - OUT_BATCH(ctx, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CS_URB_STATE | 0); + OUT_BATCH(batch, ((avc_hw_scoreboard_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */ (avc_hw_scoreboard_context->urb.num_cs_entries << 0)); /* Number of URB Entries */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_avc_hw_scoreboard_constant_buffer(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); - OUT_RELOC(ctx, avc_hw_scoreboard_context->curbe.bo, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); + OUT_RELOC(batch, avc_hw_scoreboard_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, avc_hw_scoreboard_context->urb.size_cs_entry - 1); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_avc_hw_scoreboard_objects(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + int number_mb_cmds = 512; int starting_mb_number = avc_hw_scoreboard_context->inline_data.starting_mb_number; int i; for (i = 0; i < avc_hw_scoreboard_context->inline_data.num_mb_cmds / 512; i++) { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4); - OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */ - OUT_BATCH(ctx, 0); /* no indirect data */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, ((number_mb_cmds << 16) | + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4); + OUT_BATCH(batch, 0); /* interface descriptor offset: 0 */ + OUT_BATCH(batch, 0); /* no indirect data */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, ((number_mb_cmds << 16) | (starting_mb_number << 0))); - OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs); + ADVANCE_BATCH(batch); starting_mb_number += 512; } @@ -292,23 +312,26 @@ i965_avc_hw_scoreboard_objects(VADriverContextP ctx, struct i965_avc_hw_scoreboa number_mb_cmds = avc_hw_scoreboard_context->inline_data.num_mb_cmds % 512; if (number_mb_cmds) { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4); - OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */ - OUT_BATCH(ctx, 0); /* no indirect data */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, ((number_mb_cmds << 16) | + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4); + OUT_BATCH(batch, 0); /* interface descriptor offset: 0 */ + OUT_BATCH(batch, 0); /* no indirect data */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, ((number_mb_cmds << 16) | (starting_mb_number << 0))); - OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs); + ADVANCE_BATCH(batch); } } static void i965_avc_hw_scoreboard_pipeline_setup(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context) { - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); i965_avc_hw_scoreboard_pipeline_select(ctx); i965_avc_hw_scoreboard_state_base_address(ctx); i965_avc_hw_scoreboard_state_pointers(ctx, avc_hw_scoreboard_context); @@ -316,7 +339,7 @@ i965_avc_hw_scoreboard_pipeline_setup(VADriverContextP ctx, struct i965_avc_hw_s i965_avc_hw_scoreboard_cs_urb_layout(ctx, avc_hw_scoreboard_context); i965_avc_hw_scoreboard_constant_buffer(ctx, avc_hw_scoreboard_context); i965_avc_hw_scoreboard_objects(ctx, avc_hw_scoreboard_context); - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } void diff --git a/i965_avc_ildb.c b/i965_avc_ildb.c index 739e0cf..1fb72a4 100644 --- a/i965_avc_ildb.c +++ b/i965_avc_ildb.c @@ -398,14 +398,19 @@ i965_avc_ildb_states_setup(VADriverContextP ctx, static void i965_avc_ildb_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); } static void i965_avc_ildb_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context; @@ -414,119 +419,132 @@ i965_avc_ildb_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h2 vfe_fence = avc_ildb_context->urb.cs_start; cs_fence = URB_SIZE((&i965->intel)); - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */ (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_avc_ildb_state_base_address(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); if (IS_IRONLAKE(i965->intel.device_id)) { - BEGIN_BATCH(ctx, 8); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } else { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } } static void i965_avc_ildb_state_pointers(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context; - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1); - OUT_BATCH(ctx, 0); - OUT_RELOC(ctx, avc_ildb_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1); + OUT_BATCH(batch, 0); + OUT_RELOC(batch, avc_ildb_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(batch); } static void i965_avc_ildb_cs_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context; - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CS_URB_STATE | 0); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CS_URB_STATE | 0); + OUT_BATCH(batch, ((avc_ildb_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */ (avc_ildb_context->urb.num_cs_entries << 0)); /* Number of URB Entries */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_avc_ildb_constant_buffer(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context; - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); - OUT_RELOC(ctx, avc_ildb_context->curbe.bo, + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); + OUT_RELOC(batch, avc_ildb_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, avc_ildb_context->urb.size_cs_entry - 1); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_avc_ildb_objects(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context; - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4); + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4); switch (avc_ildb_context->picture_type) { case PICTURE_FRAME: - OUT_BATCH(ctx, AVC_ILDB_ROOT_Y_ILDB_FRAME); + OUT_BATCH(batch, AVC_ILDB_ROOT_Y_ILDB_FRAME); break; case PICTURE_FIELD: - OUT_BATCH(ctx, AVC_ILDB_ROOT_Y_ILDB_FIELD); + OUT_BATCH(batch, AVC_ILDB_ROOT_Y_ILDB_FIELD); break; case PICTURE_MBAFF: - OUT_BATCH(ctx, AVC_ILDB_ROOT_Y_ILDB_MBAFF); + OUT_BATCH(batch, AVC_ILDB_ROOT_Y_ILDB_MBAFF); break; default: assert(0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); break; } - OUT_BATCH(ctx, 0); /* no indirect data */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 0); /* no indirect data */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } static void i965_avc_ildb_pipeline_setup(VADriverContextP ctx, struct i965_h264_context *i965_h264_context) { - intel_batchbuffer_emit_mi_flush(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_emit_mi_flush(batch); i965_avc_ildb_pipeline_select(ctx); i965_avc_ildb_state_base_address(ctx); i965_avc_ildb_state_pointers(ctx, i965_h264_context); diff --git a/i965_defines.h b/i965_defines.h index dfbae3f..66c3133 100644 --- a/i965_defines.h +++ b/i965_defines.h @@ -57,8 +57,6 @@ /* DW1 */ # define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID (1 << 15) -#define CMD_PIPE_CONTROL CMD(3, 2, 0) - /* for GEN6+ */ #define GEN6_3DSTATE_SAMPLER_STATE_POINTERS CMD(3, 0, 0x02) # define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) diff --git a/i965_drv_video.c b/i965_drv_video.c index 8f353f1..493abca 100644 --- a/i965_drv_video.c +++ b/i965_drv_video.c @@ -1550,13 +1550,15 @@ i965_QuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfaceStatus *status) /* out */ { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(render_target); assert(obj_surface); /* Commit pending operations to the HW */ - intel_batchbuffer_flush(ctx); + intel_batchbuffer_flush(batch); /* Usually GEM will handle synchronization with the graphics hardware */ #if 0 @@ -2134,6 +2136,8 @@ i965_GetImage(VADriverContextP ctx, unsigned int height, VAImageID image) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; @@ -2155,7 +2159,7 @@ i965_GetImage(VADriverContextP ctx, return VA_STATUS_ERROR_INVALID_PARAMETER; /* Commit pending operations to the HW */ - intel_batchbuffer_flush(ctx); + intel_batchbuffer_flush(batch); VAStatus va_status; void *image_data = NULL; diff --git a/i965_media.c b/i965_media.c index 8745488..322b440 100644 --- a/i965_media.c +++ b/i965_media.c @@ -46,95 +46,109 @@ static void i965_media_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); } static void i965_media_urb_layout(VADriverContextP ctx, struct i965_media_context *media_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned int vfe_fence, cs_fence; vfe_fence = media_context->urb.cs_start; cs_fence = URB_SIZE((&i965->intel)); - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */ (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_media_state_base_address(VADriverContextP ctx, struct i965_media_context *media_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); if (IS_IRONLAKE(i965->intel.device_id)) { - BEGIN_BATCH(ctx, 8); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); if (media_context->indirect_object.bo) { - OUT_RELOC(ctx, media_context->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + OUT_RELOC(batch, media_context->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, media_context->indirect_object.offset | BASE_ADDRESS_MODIFY); } else { - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); } - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } else { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); if (media_context->indirect_object.bo) { - OUT_RELOC(ctx, media_context->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + OUT_RELOC(batch, media_context->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, media_context->indirect_object.offset | BASE_ADDRESS_MODIFY); } else { - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); } - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } } static void i965_media_state_pointers(VADriverContextP ctx, struct i965_media_context *media_context) { - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1); if (media_context->extended_state.enabled) - OUT_RELOC(ctx, media_context->extended_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(batch, media_context->extended_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); else - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); - OUT_RELOC(ctx, media_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + OUT_RELOC(batch, media_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(batch); } static void i965_media_cs_urb_layout(VADriverContextP ctx, struct i965_media_context *media_context) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CS_URB_STATE | 0); - OUT_BATCH(ctx, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CS_URB_STATE | 0); + OUT_BATCH(batch, ((media_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */ (media_context->urb.num_cs_entries << 0)); /* Number of URB Entries */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void @@ -148,26 +162,32 @@ i965_media_pipeline_state(VADriverContextP ctx, struct i965_media_context *media static void i965_media_constant_buffer(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); - OUT_RELOC(ctx, media_context->curbe.bo, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); + OUT_RELOC(batch, media_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, media_context->urb.size_cs_entry - 1); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_media_depth_buffer(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_DEPTH_BUFFER | 4); - OUT_BATCH(ctx, (I965_DEPTHFORMAT_D32_FLOAT << 18) | + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_DEPTH_BUFFER | 4); + OUT_BATCH(batch, (I965_DEPTHFORMAT_D32_FLOAT << 18) | (I965_SURFACE_NULL << 29)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } static void @@ -175,8 +195,11 @@ i965_media_pipeline_setup(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context) { - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); /* step 1 */ + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); /* step 1 */ i965_media_depth_buffer(ctx); i965_media_pipeline_select(ctx); /* step 2 */ i965_media_urb_layout(ctx, media_context); /* step 3 */ @@ -184,7 +207,7 @@ i965_media_pipeline_setup(VADriverContextP ctx, i965_media_constant_buffer(ctx, decode_state, media_context); /* step 5 */ assert(media_context->media_objects); media_context->media_objects(ctx, decode_state, media_context); /* step 6 */ - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } static void diff --git a/i965_media_h264.c b/i965_media_h264.c index 2298361..feda871 100644 --- a/i965_media_h264.c +++ b/i965_media_h264.c @@ -710,6 +710,8 @@ i965_media_h264_objects(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_h264_context *i965_h264_context; unsigned int *object_command; @@ -725,19 +727,19 @@ i965_media_h264_objects(VADriverContextP ctx, *object_command = MI_BATCH_BUFFER_END; dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo); - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, MI_BATCH_BUFFER_START | (2 << 6)); - OUT_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo, I915_GEM_DOMAIN_COMMAND, 0, 0); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END * will cause control to pass back to ring buffer */ - intel_batchbuffer_end_atomic(ctx); - intel_batchbuffer_flush(ctx); - intel_batchbuffer_start_atomic(ctx, 0x1000); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); + intel_batchbuffer_start_atomic(batch, 0x1000); i965_avc_ildb(ctx, decode_state, i965_h264_context); } diff --git a/i965_media_mpeg2.c b/i965_media_mpeg2.c index 406ace4..a276552 100644 --- a/i965_media_mpeg2.c +++ b/i965_media_mpeg2.c @@ -882,6 +882,8 @@ i965_media_mpeg2_objects(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int i, j; VASliceParameterBufferMPEG2 *slice_param; @@ -893,20 +895,20 @@ i965_media_mpeg2_objects(VADriverContextP ctx, for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); - OUT_RELOC(ctx, decode_state->slice_datas[j]->bo, + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); + OUT_RELOC(batch, decode_state->slice_datas[j]->bo, I915_GEM_DOMAIN_SAMPLER, 0, slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); - OUT_BATCH(ctx, + OUT_BATCH(batch, ((slice_param->slice_horizontal_position << 24) | (slice_param->slice_vertical_position << 16) | (127 << 8) | (slice_param->macroblock_offset & 0x7))); - OUT_BATCH(ctx, slice_param->quantiser_scale_code << 24); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, slice_param->quantiser_scale_code << 24); + ADVANCE_BATCH(batch); slice_param++; } } diff --git a/i965_post_processing.c b/i965_post_processing.c index 1b3e724..ba3a69f 100644 --- a/i965_post_processing.c +++ b/i965_post_processing.c @@ -399,78 +399,97 @@ ironlake_pp_states_setup(VADriverContextP ctx) static void ironlake_pp_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); } static void ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; unsigned int vfe_fence, cs_fence; vfe_fence = pp_context->urb.cs_start; cs_fence = pp_context->urb.size; - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */ (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void ironlake_pp_state_base_address(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 8); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } static void ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1); - OUT_BATCH(ctx, 0); - OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1); + OUT_BATCH(batch, 0); + OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(batch); } static void ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CS_URB_STATE | 0); - OUT_BATCH(ctx, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CS_URB_STATE | 0); + OUT_BATCH(batch, ((pp_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */ (pp_context->urb.num_cs_entries << 0)); /* Number of URB Entries */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); - OUT_RELOC(ctx, pp_context->curbe.bo, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); + OUT_RELOC(batch, pp_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, pp_context->urb.size_cs_entry - 1); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int x, x_steps, y, y_steps; x_steps = pp_context->pp_x_steps(&pp_context->private_context); @@ -479,17 +498,17 @@ ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_cont for (y = 0; y < y_steps; y++) { for (x = 0; x < x_steps; x++) { if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { - BEGIN_BATCH(ctx, 20); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* no indirect data */ - OUT_BATCH(ctx, 0); + BEGIN_BATCH(batch, 20); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* no indirect data */ + OUT_BATCH(batch, 0); /* inline data grf 5-6 */ assert(sizeof(pp_inline_parameter) == 64); - intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter)); + intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter)); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } } } @@ -498,11 +517,13 @@ ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_cont static void ironlake_pp_pipeline_setup(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); ironlake_pp_pipeline_select(ctx); ironlake_pp_state_base_address(ctx); ironlake_pp_state_pointers(ctx, pp_context); @@ -510,7 +531,7 @@ ironlake_pp_pipeline_setup(VADriverContextP ctx) ironlake_pp_cs_urb_layout(ctx, pp_context); ironlake_pp_constant_buffer(ctx, pp_context); ironlake_pp_object_walker(ctx, pp_context); - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } static int @@ -2047,82 +2068,99 @@ gen6_pp_states_setup(VADriverContextP ctx) static void gen6_pp_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); } static void gen6_pp_state_base_address(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 10); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2)); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 10); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } static void gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { - BEGIN_BATCH(ctx, 8); - OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | (8 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (pp_context->urb.num_vfe_entries - 1) << 16 | pp_context->urb.num_vfe_entries << 8); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (pp_context->urb.size_vfe_entry * 2) << 16 | /* in 256 bits unit */ (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1)); /* in 256 bits unit */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } static void gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size); - BEGIN_BATCH(ctx, 4); - OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | (4 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512); - OUT_RELOC(ctx, + OUT_RELOC(batch, pp_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { - BEGIN_BATCH(ctx, 4); - OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data)); - OUT_RELOC(ctx, + OUT_RELOC(batch, pp_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int x, x_steps, y, y_steps; x_steps = pp_context->pp_x_steps(&pp_context->private_context); @@ -2131,19 +2169,19 @@ gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context for (y = 0; y < y_steps; y++) { for (x = 0; x < x_steps; x++) { if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { - BEGIN_BATCH(ctx, 22); - OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 20); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* no indirect data */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* scoreboard */ - OUT_BATCH(ctx, 0); + BEGIN_BATCH(batch, 22); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* no indirect data */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* scoreboard */ + OUT_BATCH(batch, 0); /* inline data grf 5-6 */ assert(sizeof(pp_inline_parameter) == 64); - intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter)); + intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter)); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } } } @@ -2152,18 +2190,20 @@ gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context static void gen6_pp_pipeline_setup(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); gen6_pp_pipeline_select(ctx); gen6_pp_curbe_load(ctx, pp_context); gen6_interface_descriptor_load(ctx, pp_context); gen6_pp_state_base_address(ctx); gen6_pp_vfe_state(ctx, pp_context); gen6_pp_object_walker(ctx, pp_context); - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } static void diff --git a/i965_render.c b/i965_render.c index c04b72c..ff7483e 100644 --- a/i965_render.c +++ b/i965_render.c @@ -968,94 +968,112 @@ i965_subpic_render_state_setup(VADriverContextP ctx, static void i965_render_pipeline_select(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + ADVANCE_BATCH(batch); } static void i965_render_state_sip(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_STATE_SIP | 0); - OUT_BATCH(ctx, 0); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } static void i965_render_state_base_address(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; if (IS_IRONLAKE(i965->intel.device_id)) { - BEGIN_BATCH(ctx, 8); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } else { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); } } static void i965_render_binding_table_pointers(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4); - OUT_BATCH(ctx, 0); /* vs */ - OUT_BATCH(ctx, 0); /* gs */ - OUT_BATCH(ctx, 0); /* clip */ - OUT_BATCH(ctx, 0); /* sf */ - OUT_BATCH(ctx, BINDING_TABLE_OFFSET); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4); + OUT_BATCH(batch, 0); /* vs */ + OUT_BATCH(batch, 0); /* gs */ + OUT_BATCH(batch, 0); /* clip */ + OUT_BATCH(batch, 0); /* sf */ + OUT_BATCH(batch, BINDING_TABLE_OFFSET); + ADVANCE_BATCH(batch); } static void i965_render_constant_color(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 5); - OUT_BATCH(ctx, CMD_CONSTANT_COLOR | 3); - OUT_BATCH(ctx, float_to_uint(1.0)); - OUT_BATCH(ctx, float_to_uint(0.0)); - OUT_BATCH(ctx, float_to_uint(1.0)); - OUT_BATCH(ctx, float_to_uint(1.0)); - ADVANCE_BATCH(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3); + OUT_BATCH(batch, float_to_uint(1.0)); + OUT_BATCH(batch, float_to_uint(0.0)); + OUT_BATCH(batch, float_to_uint(1.0)); + OUT_BATCH(batch, float_to_uint(1.0)); + ADVANCE_BATCH(batch); } static void i965_render_pipelined_pointers(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - BEGIN_BATCH(ctx, 7); - OUT_BATCH(ctx, CMD_PIPELINED_POINTERS | 5); - OUT_RELOC(ctx, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(ctx, 0); /* disable GS */ - OUT_BATCH(ctx, 0); /* disable CLIP */ - OUT_RELOC(ctx, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(ctx, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5); + OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(batch, 0); /* disable GS */ + OUT_BATCH(batch, 0); /* disable CLIP */ + OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(batch); } static void i965_render_urb_layout(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; int urb_vs_start, urb_vs_size; int urb_gs_start, urb_gs_size; int urb_clip_start, urb_clip_size; @@ -1073,8 +1091,8 @@ i965_render_urb_layout(VADriverContextP ctx) urb_cs_start = urb_sf_start + urb_sf_size; urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; - BEGIN_BATCH(ctx, 3); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_URB_FENCE | UF0_CS_REALLOC | UF0_SF_REALLOC | @@ -1082,107 +1100,116 @@ i965_render_urb_layout(VADriverContextP ctx) UF0_GS_REALLOC | UF0_VS_REALLOC | 1); - OUT_BATCH(ctx, + OUT_BATCH(batch, ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); - OUT_BATCH(ctx, + OUT_BATCH(batch, ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_render_cs_urb_layout(VADriverContextP ctx) { - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CS_URB_STATE | 0); - OUT_BATCH(ctx, + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CS_URB_STATE | 0); + OUT_BATCH(batch, ((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */ (URB_CS_ENTRIES << 0)); /* Number of URB Entries */ - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_render_constant_buffer(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - BEGIN_BATCH(ctx, 2); - OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); - OUT_RELOC(ctx, render_state->curbe.bo, + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2)); + OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, URB_CS_ENTRY_SIZE - 1); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } static void i965_render_drawing_rectangle(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; struct intel_region *dest_region = render_state->draw_region; - BEGIN_BATCH(ctx, 4); - OUT_BATCH(ctx, CMD_DRAWING_RECTANGLE | 2); - OUT_BATCH(ctx, 0x00000000); - OUT_BATCH(ctx, (dest_region->width - 1) | (dest_region->height - 1) << 16); - OUT_BATCH(ctx, 0x00000000); - ADVANCE_BATCH(ctx); + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2); + OUT_BATCH(batch, 0x00000000); + OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16); + OUT_BATCH(batch, 0x00000000); + ADVANCE_BATCH(batch); } static void i965_render_vertex_elements(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); if (IS_IRONLAKE(i965->intel.device_id)) { - BEGIN_BATCH(ctx, 5); - OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3); + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3); /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ - OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (0 << VE0_OFFSET_SHIFT)); - OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ - OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (8 << VE0_OFFSET_SHIFT)); - OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } else { - BEGIN_BATCH(ctx, 5); - OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3); + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3); /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ - OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (0 << VE0_OFFSET_SHIFT)); - OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ - OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (8 << VE0_OFFSET_SHIFT)); - OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); - ADVANCE_BATCH(ctx); + ADVANCE_BATCH(batch); } } @@ -1193,6 +1220,8 @@ i965_render_upload_image_palette( unsigned int alpha ) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned int i; @@ -1202,53 +1231,57 @@ i965_render_upload_image_palette( if (obj_image->image.num_palette_entries == 0) return; - BEGIN_BATCH(ctx, 1 + obj_image->image.num_palette_entries); - OUT_BATCH(ctx, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1)); + BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries); + OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1)); /*fill palette*/ //int32_t out[16]; //0-23:color 23-31:alpha for (i = 0; i < obj_image->image.num_palette_entries; i++) - OUT_BATCH(ctx, (alpha << 24) | obj_image->palette[i]); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]); + ADVANCE_BATCH(batch); } static void i965_render_startup(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - BEGIN_BATCH(ctx, 11); - OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3); + OUT_BATCH(batch, (0 << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); - OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); if (IS_IRONLAKE(i965->intel.device_id)) - OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); else - OUT_BATCH(ctx, 3); + OUT_BATCH(batch, 3); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, + OUT_BATCH(batch, CMD_3DPRIMITIVE | _3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); - OUT_BATCH(ctx, 3); /* vertex count per instance */ - OUT_BATCH(ctx, 0); /* start vertex offset */ - OUT_BATCH(ctx, 1); /* single instance */ - OUT_BATCH(ctx, 0); /* start instance location */ - OUT_BATCH(ctx, 0); /* index buffer offset, ignored */ - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 3); /* vertex count per instance */ + OUT_BATCH(batch, 0); /* start vertex offset */ + OUT_BATCH(batch, 1); /* single instance */ + OUT_BATCH(batch, 0); /* start instance location */ + OUT_BATCH(batch, 0); /* index buffer offset, ignored */ + ADVANCE_BATCH(batch); } static void i965_clear_dest_region(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; struct intel_region *dest_region = render_state->draw_region; @@ -1274,28 +1307,36 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; - if (IS_GEN6(i965->intel.device_id)) - BEGIN_BLT_BATCH(ctx, 6); - else - BEGIN_BATCH(ctx, 6); - OUT_BATCH(ctx, blt_cmd); - OUT_BATCH(ctx, br13); - OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x)); - OUT_BATCH(ctx, ((dest_region->y + dest_region->height) << 16) | + if (IS_GEN6(i965->intel.device_id)) { + intel_batchbuffer_start_atomic_blt(batch, 24); + BEGIN_BLT_BATCH(batch, 6); + } else { + intel_batchbuffer_start_atomic(batch, 24); + BEGIN_BATCH(batch, 6); + } + + OUT_BATCH(batch, blt_cmd); + OUT_BATCH(batch, br13); + OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x)); + OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) | (dest_region->x + dest_region->width)); - OUT_RELOC(ctx, dest_region->bo, + OUT_RELOC(batch, dest_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - OUT_BATCH(ctx, 0x0); - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 0x0); + ADVANCE_BATCH(batch); + intel_batchbuffer_end_atomic(batch); } static void i965_surface_render_pipeline_setup(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + i965_clear_dest_region(ctx); - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); i965_render_pipeline_select(ctx); i965_render_state_sip(ctx); i965_render_state_base_address(ctx); @@ -1308,14 +1349,17 @@ i965_surface_render_pipeline_setup(VADriverContextP ctx) i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } static void i965_subpic_render_pipeline_setup(VADriverContextP ctx) { - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); i965_render_pipeline_select(ctx); i965_render_state_sip(ctx); i965_render_state_base_address(ctx); @@ -1327,7 +1371,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx) i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } @@ -1424,12 +1468,15 @@ i965_render_put_surface(VADriverContextP ctx, unsigned short desth, unsigned int flag) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + i965_render_initialize(ctx); i965_surface_render_state_setup(ctx, surface, srcx, srcy, srcw, srch, destx, desty, destw, desth); i965_surface_render_pipeline_setup(ctx); - intel_batchbuffer_flush(ctx); + intel_batchbuffer_flush(batch); } static void @@ -1444,9 +1491,12 @@ i965_render_put_subpicture(VADriverContextP ctx, unsigned short destw, unsigned short desth) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(surface); struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + assert(obj_subpic); i965_render_initialize(ctx); @@ -1455,7 +1505,7 @@ i965_render_put_subpicture(VADriverContextP ctx, destx, desty, destw, desth); i965_subpic_render_pipeline_setup(ctx); i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); - intel_batchbuffer_flush(ctx); + intel_batchbuffer_flush(batch); } /* @@ -1608,116 +1658,136 @@ gen6_render_setup_states(VADriverContextP ctx, static void gen6_emit_invarient_states(VADriverContextP ctx) { - OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); - OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); - OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); - OUT_BATCH(ctx, 1); + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(batch, 1); /* Set system instruction pointer */ - OUT_BATCH(ctx, CMD_STATE_SIP | 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); } static void gen6_emit_state_base_address(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2)); - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */ - OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ - OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ } static void gen6_emit_viewport_state_pointers(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | (4 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } static void gen6_emit_urb(VADriverContextP ctx) { - OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2)); - OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ - OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ } static void gen6_emit_cc_state_pointers(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); - OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); } static void gen6_emit_sampler_state_pointers(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS | GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | (4 - 2)); - OUT_BATCH(ctx, 0); /* VS */ - OUT_BATCH(ctx, 0); /* GS */ - OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(batch, 0); /* VS */ + OUT_BATCH(batch, 0); /* GS */ + OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); } static void gen6_emit_binding_table(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + /* Binding table pointers */ - OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | + OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); - OUT_BATCH(ctx, 0); /* vs */ - OUT_BATCH(ctx, 0); /* gs */ + OUT_BATCH(batch, 0); /* vs */ + OUT_BATCH(batch, 0); /* gs */ /* Only the PS uses the binding table */ - OUT_BATCH(ctx, BINDING_TABLE_OFFSET); + OUT_BATCH(batch, BINDING_TABLE_OFFSET); } static void gen6_emit_depth_buffer_state(VADriverContextP ctx) { - OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2)); - OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) | + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) | (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2)); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(batch, 0); } static void @@ -1729,130 +1799,147 @@ gen6_emit_drawing_rectangle(VADriverContextP ctx) static void gen6_emit_vs_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + /* disable VS constant buffer */ - OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2)); - OUT_BATCH(ctx, 0); /* without VS kernel */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* pass-through */ + OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(batch, 0); /* without VS kernel */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ } static void gen6_emit_gs_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + /* disable GS constant buffer */ - OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2)); - OUT_BATCH(ctx, 0); /* without GS kernel */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* pass-through */ + OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(batch, 0); /* without GS kernel */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ } static void gen6_emit_clip_state(VADriverContextP ctx) { - OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* pass-through */ - OUT_BATCH(ctx, 0); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); } static void gen6_emit_sf_state(VADriverContextP ctx) { - OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2)); - OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) | + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) | (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) | (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE); - OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* DW9 */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* DW14 */ - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); /* DW19 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* DW9 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* DW14 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* DW19 */ } static void gen6_emit_wm_state(VADriverContextP ctx, int kernel) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS | + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(ctx, + OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2)); - OUT_RELOC(ctx, render_state->render_kernels[kernel].bo, + OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2)); + OUT_RELOC(batch, render_state->render_kernels[kernel].bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ - OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + OUT_BATCH(batch, 0); + OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ + OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); - OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | + OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); } static void gen6_emit_vertex_element_state(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + /* Set up our vertex elements, sourced from the single vertex buffer. */ - OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2)); + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2)); /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ - OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (0 << VE0_OFFSET_SHIFT)); - OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ - OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | (8 << VE0_OFFSET_SHIFT)); - OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); @@ -1861,38 +1948,43 @@ gen6_emit_vertex_element_state(VADriverContextP ctx) static void gen6_emit_vertices(VADriverContextP ctx) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - BEGIN_BATCH(ctx, 11); - OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3); - OUT_BATCH(ctx, + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3); + OUT_BATCH(batch, (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | GEN6_VB0_VERTEXDATA | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); - OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); - OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); - OUT_BATCH(ctx, 0); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); + OUT_BATCH(batch, 0); - OUT_BATCH(ctx, + OUT_BATCH(batch, CMD_3DPRIMITIVE | _3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); - OUT_BATCH(ctx, 3); /* vertex count per instance */ - OUT_BATCH(ctx, 0); /* start vertex offset */ - OUT_BATCH(ctx, 1); /* single instance */ - OUT_BATCH(ctx, 0); /* start instance location */ - OUT_BATCH(ctx, 0); /* index buffer offset, ignored */ - ADVANCE_BATCH(ctx); + OUT_BATCH(batch, 3); /* vertex count per instance */ + OUT_BATCH(batch, 0); /* start vertex offset */ + OUT_BATCH(batch, 1); /* single instance */ + OUT_BATCH(batch, 0); /* start instance location */ + OUT_BATCH(batch, 0); /* index buffer offset, ignored */ + ADVANCE_BATCH(batch); } static void gen6_render_emit_states(VADriverContextP ctx, int kernel) { - intel_batchbuffer_start_atomic(ctx, 0x1000); - intel_batchbuffer_emit_mi_flush(ctx); + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); gen6_emit_invarient_states(ctx); gen6_emit_state_base_address(ctx); gen6_emit_viewport_state_pointers(ctx); @@ -1909,7 +2001,7 @@ gen6_render_emit_states(VADriverContextP ctx, int kernel) gen6_emit_drawing_rectangle(ctx); gen6_emit_vertex_element_state(ctx); gen6_emit_vertices(ctx); - intel_batchbuffer_end_atomic(ctx); + intel_batchbuffer_end_atomic(batch); } static void @@ -1925,13 +2017,16 @@ gen6_render_put_surface(VADriverContextP ctx, unsigned short desth, unsigned int flag) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; + gen6_render_initialize(ctx); gen6_render_setup_states(ctx, surface, srcx, srcy, srcw, srch, destx, desty, destw, desth); i965_clear_dest_region(ctx); gen6_render_emit_states(ctx, PS_KERNEL); - intel_batchbuffer_flush(ctx); + intel_batchbuffer_flush(batch); } static void @@ -1997,6 +2092,8 @@ gen6_render_put_subpicture(VADriverContextP ctx, unsigned short destw, unsigned short desth) { + struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_batchbuffer *batch = intel->batch; struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(surface); struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); @@ -2008,7 +2105,7 @@ gen6_render_put_subpicture(VADriverContextP ctx, destx, desty, destw, desth); gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL); i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); - intel_batchbuffer_flush(ctx); + intel_batchbuffer_flush(batch); } /* diff --git a/intel_batchbuffer.c b/intel_batchbuffer.c index f4e629a..30d782a 100644 --- a/intel_batchbuffer.c +++ b/intel_batchbuffer.c @@ -45,7 +45,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) dri_bo_unreference(batch->buffer); batch->buffer = dri_bo_alloc(intel->bufmgr, - batch->flag == I915_EXEC_RENDER ? "render batch buffer" : "bsd batch buffer", + "batch buffer", batch_size, 0x1000); assert(batch->buffer); @@ -57,66 +57,47 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->atomic = 0; } -Bool -intel_batchbuffer_init(struct intel_driver_data *intel) +static unsigned int +intel_batchbuffer_space(struct intel_batchbuffer *batch) { - intel->batch = calloc(1, sizeof(*(intel->batch))); - assert(intel->batch); - intel->batch->intel = intel; - intel->batch->flag = I915_EXEC_RENDER; - intel->batch->run = drm_intel_bo_mrb_exec; - intel_batchbuffer_reset(intel->batch); - - if (intel->has_bsd) { - intel->batch_bcs = calloc(1, sizeof(*(intel->batch_bcs))); - assert(intel->batch_bcs); - intel->batch_bcs->intel = intel; - intel->batch_bcs->flag = I915_EXEC_BSD; - intel->batch_bcs->run = drm_intel_bo_mrb_exec; - intel_batchbuffer_reset(intel->batch_bcs); - } - - return True; + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); } -Bool -intel_batchbuffer_terminate(struct intel_driver_data *intel) + +struct intel_batchbuffer * +intel_batchbuffer_new(struct intel_driver_data *intel, int flag) { - if (intel->batch) { - if (intel->batch->map) { - dri_bo_unmap(intel->batch->buffer); - intel->batch->map = NULL; - } + struct intel_batchbuffer *batch = calloc(1, sizeof(*batch)); + assert(flag == I915_EXEC_RENDER || + flag == I915_EXEC_BSD || + flag == I915_EXEC_BLT); - dri_bo_unreference(intel->batch->buffer); - free(intel->batch); - intel->batch = NULL; - } + batch->intel = intel; + batch->flag = flag; + batch->run = drm_intel_bo_mrb_exec; + intel_batchbuffer_reset(batch); - if (intel->batch_bcs) { - if (intel->batch_bcs->map) { - dri_bo_unmap(intel->batch_bcs->buffer); - intel->batch_bcs->map = NULL; - } + return batch; +} - dri_bo_unreference(intel->batch_bcs->buffer); - free(intel->batch_bcs); - intel->batch_bcs = NULL; +void intel_batchbuffer_free(struct intel_batchbuffer *batch) +{ + if (batch->map) { + dri_bo_unmap(batch->buffer); + batch->map = NULL; } - return True; + dri_bo_unreference(batch->buffer); + free(batch); } -static Bool -intel_batchbuffer_flush_helper(VADriverContextP ctx, - struct intel_batchbuffer *batch) +void +intel_batchbuffer_flush(struct intel_batchbuffer *batch) { - struct intel_driver_data *intel = batch->intel; unsigned int used = batch->ptr - batch->map; - int is_locked = intel->locked; if (used == 0) { - return True; + return; } if ((used & 4) == 0) { @@ -128,322 +109,167 @@ intel_batchbuffer_flush_helper(VADriverContextP ctx, batch->ptr += 4; dri_bo_unmap(batch->buffer); used = batch->ptr - batch->map; - - if (!is_locked) - intel_lock_hardware(ctx); - batch->run(batch->buffer, used, 0, 0, 0, batch->flag); - - if (!is_locked) - intel_unlock_hardware(ctx); - intel_batchbuffer_reset(batch); - - return True; -} - -Bool -intel_batchbuffer_flush(VADriverContextP ctx) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - return intel_batchbuffer_flush_helper(ctx, intel->batch); -} - -Bool -intel_batchbuffer_flush_bcs(VADriverContextP ctx) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - return intel_batchbuffer_flush_helper(ctx, intel->batch_bcs); -} - -static unsigned int -intel_batchbuffer_space_helper(struct intel_batchbuffer *batch) -{ - return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); } -static void -intel_batchbuffer_emit_dword_helper(struct intel_batchbuffer *batch, - unsigned int x) +void +intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x) { - assert(intel_batchbuffer_space_helper(batch) >= 4); + assert(intel_batchbuffer_space(batch) >= 4); *(unsigned int *)batch->ptr = x; batch->ptr += 4; } void -intel_batchbuffer_emit_dword(VADriverContextP ctx, unsigned int x) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_emit_dword_helper(intel->batch, x); -} - -void -intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_emit_dword_helper(intel->batch_bcs, x); -} - -static void -intel_batchbuffer_emit_reloc_helper(VADriverContextP ctx, - struct intel_batchbuffer *batch, - dri_bo *bo, - uint32_t read_domains, uint32_t write_domains, - uint32_t delta) +intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo, + uint32_t read_domains, uint32_t write_domains, + uint32_t delta) { assert(batch->ptr - batch->map < batch->size); dri_bo_emit_reloc(batch->buffer, read_domains, write_domains, delta, batch->ptr - batch->map, bo); - intel_batchbuffer_emit_dword_helper(batch, bo->offset + delta); -} - -void -intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo, - uint32_t read_domains, uint32_t write_domains, - uint32_t delta) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_emit_reloc_helper(ctx, intel->batch, - bo, read_domains, write_domains, - delta); + intel_batchbuffer_emit_dword(batch, bo->offset + delta); } void -intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo, - uint32_t read_domains, uint32_t write_domains, - uint32_t delta) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_emit_reloc_helper(ctx, intel->batch_bcs, - bo, read_domains, write_domains, - delta); -} - -static void -intel_batchbuffer_require_space_helper(VADriverContextP ctx, - struct intel_batchbuffer *batch, - unsigned int size) +intel_batchbuffer_require_space(struct intel_batchbuffer *batch, + unsigned int size) { assert(size < batch->size - 8); - if (intel_batchbuffer_space_helper(batch) < size) { - intel_batchbuffer_flush_helper(ctx, batch); + if (intel_batchbuffer_space(batch) < size) { + intel_batchbuffer_flush(batch); } } void -intel_batchbuffer_require_space(VADriverContextP ctx, unsigned int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_require_space_helper(ctx, intel->batch, size); -} - -void -intel_batchbuffer_require_space_bcs(VADriverContextP ctx, unsigned int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_require_space_helper(ctx, intel->batch_bcs, size); -} - -static void -intel_batchbuffer_data_helper(VADriverContextP ctx, - struct intel_batchbuffer *batch, - void *data, - unsigned int size) +intel_batchbuffer_data(struct intel_batchbuffer *batch, + void *data, + unsigned int size) { assert((size & 3) == 0); - intel_batchbuffer_require_space_helper(ctx, batch, size); + intel_batchbuffer_require_space(batch, size); assert(batch->ptr); memcpy(batch->ptr, data, size); batch->ptr += size; } -void -intel_batchbuffer_data(VADriverContextP ctx, void *data, unsigned int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_data_helper(ctx, intel->batch, data, size); -} - -void -intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_data_helper(ctx, intel->batch_bcs, data, size); -} - void -intel_batchbuffer_emit_mi_flush(VADriverContextP ctx) +intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) { - struct intel_driver_data *intel = intel_driver_data(ctx); - - if (intel->batch->flag == I915_EXEC_BLT) { - BEGIN_BLT_BATCH(ctx, 4); - OUT_BATCH(ctx, MI_FLUSH_DW); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - OUT_BATCH(ctx, 0); - ADVANCE_BATCH(ctx); - } else if (intel->batch->flag == I915_EXEC_RENDER) { - BEGIN_BATCH(ctx, 1); - OUT_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE); - ADVANCE_BATCH(ctx); - } -} - -void -intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); + struct intel_driver_data *intel = batch->intel; if (IS_GEN6(intel->device_id)) { - BEGIN_BCS_BATCH(ctx, 4); - OUT_BCS_BATCH(ctx, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - OUT_BCS_BATCH(ctx, 0); - ADVANCE_BCS_BATCH(ctx); + if (batch->flag == I915_EXEC_RENDER) { + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2); + OUT_BATCH(batch, + CMD_PIPE_CONTROL_WC_FLUSH | + CMD_PIPE_CONTROL_TC_FLUSH | + CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } else { + if (batch->flag == I915_EXEC_BLT) { + BEGIN_BLT_BATCH(batch, 4); + OUT_BLT_BATCH(batch, MI_FLUSH_DW); + OUT_BLT_BATCH(batch, 0); + OUT_BLT_BATCH(batch, 0); + OUT_BLT_BATCH(batch, 0); + ADVANCE_BLT_BATCH(batch); + } else { + assert(batch->flag == I915_EXEC_BSD); + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); + } + } } else { - BEGIN_BCS_BATCH(ctx, 1); - OUT_BCS_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE); - ADVANCE_BCS_BATCH(ctx); + if (batch->flag == I915_EXEC_RENDER) { + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE); + ADVANCE_BATCH(batch); + } else { + assert(batch->flag == I915_EXEC_BSD); + BEGIN_BCS_BATCH(batch, 1); + OUT_BCS_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE); + ADVANCE_BCS_BATCH(batch); + } } } void -intel_batchbuffer_start_atomic_helper(VADriverContextP ctx, - struct intel_batchbuffer *batch, - unsigned int size) +intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total) { - assert(!batch->atomic); - intel_batchbuffer_require_space_helper(ctx, batch, size); - batch->atomic = 1; -} - -void -intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - intel_batchbuffer_check_batchbuffer_flag(ctx, I915_EXEC_RENDER); - intel_batchbuffer_start_atomic_helper(ctx, intel->batch, size); + batch->emit_total = total * 4; + batch->emit_start = batch->ptr; } void -intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size) +intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch) { - struct intel_driver_data *intel = intel_driver_data(ctx); - intel_batchbuffer_start_atomic_helper(ctx, intel->batch_bcs, size); + assert(batch->emit_total == (batch->ptr - batch->emit_start)); } void -intel_batchbuffer_end_atomic_helper(struct intel_batchbuffer *batch) +intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag) { - assert(batch->atomic); - batch->atomic = 0; -} + if (flag != I915_EXEC_RENDER && + flag != I915_EXEC_BLT && + flag != I915_EXEC_BSD) + return; -void -intel_batchbuffer_end_atomic(VADriverContextP ctx) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); + if (batch->flag == flag) + return; - intel_batchbuffer_end_atomic_helper(intel->batch); + intel_batchbuffer_flush(batch); + batch->flag = flag; } -void -intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx) +int +intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size) { - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_end_atomic_helper(intel->batch_bcs); + return intel_batchbuffer_space(batch) >= size; } static void -intel_batchbuffer_begin_batch_helper(struct intel_batchbuffer *batch, int total) -{ - batch->emit_total = total * 4; - batch->emit_start = batch->ptr; -} - -void -intel_batchbuffer_begin_batch(VADriverContextP ctx, int total) +intel_batchbuffer_start_atomic_helper(struct intel_batchbuffer *batch, + int flag, + unsigned int size) { - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_begin_batch_helper(intel->batch, total); + assert(!batch->atomic); + intel_batchbuffer_check_batchbuffer_flag(batch, flag); + intel_batchbuffer_require_space(batch, size); + batch->atomic = 1; } void -intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_begin_batch_helper(intel->batch_bcs, total); -} - -static void -intel_batchbuffer_advance_batch_helper(struct intel_batchbuffer *batch) +intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size) { - assert(batch->emit_total == (batch->ptr - batch->emit_start)); + intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_RENDER, size); } void -intel_batchbuffer_advance_batch(VADriverContextP ctx) +intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size) { - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_advance_batch_helper(intel->batch); + intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_BLT, size); } void -intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx) +intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size) { - struct intel_driver_data *intel = intel_driver_data(ctx); - - intel_batchbuffer_advance_batch_helper(intel->batch_bcs); + intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_BSD, size); } void -intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag) +intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch) { - struct intel_driver_data *intel = intel_driver_data(ctx); - - if (flag != I915_EXEC_RENDER && - flag != I915_EXEC_BLT && - flag != I915_EXEC_BSD) - return; - - if (intel->batch->flag == flag) - return; - - intel_batchbuffer_flush_helper(ctx, intel->batch); - intel->batch->flag = flag; -} - -int -intel_batchbuffer_check_free_space(VADriverContextP ctx, int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - return intel_batchbuffer_space_helper(intel->batch) >= size; -} - -int -intel_batchbuffer_check_free_space_bcs(VADriverContextP ctx, int size) -{ - struct intel_driver_data *intel = intel_driver_data(ctx); - - return intel_batchbuffer_space_helper(intel->batch_bcs) >= size; + assert(batch->atomic); + batch->atomic = 0; } diff --git a/intel_batchbuffer.h b/intel_batchbuffer.h index 6df8d45..092da5a 100644 --- a/intel_batchbuffer.h +++ b/intel_batchbuffer.h @@ -26,82 +26,65 @@ struct intel_batchbuffer int DR4, unsigned int ring_flag); }; -Bool intel_batchbuffer_init(struct intel_driver_data *intel); -Bool intel_batchbuffer_terminate(struct intel_driver_data *intel); - -void intel_batchbuffer_emit_dword(VADriverContextP ctx, unsigned int x); -void intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo, +struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag); +void intel_batchbuffer_free(struct intel_batchbuffer *batch); +void intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size); +void intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size); +void intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size); +void intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch); +void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x); +void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo, uint32_t read_domains, uint32_t write_domains, uint32_t delta); -void intel_batchbuffer_require_space(VADriverContextP ctx, unsigned int size); -void intel_batchbuffer_data(VADriverContextP ctx, void *data, unsigned int size); -void intel_batchbuffer_emit_mi_flush(VADriverContextP ctx); -void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size); -void intel_batchbuffer_end_atomic(VADriverContextP ctx); -Bool intel_batchbuffer_flush(VADriverContextP ctx); - -void intel_batchbuffer_begin_batch(VADriverContextP ctx, int total); -void intel_batchbuffer_advance_batch(VADriverContextP ctx); - -void intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x); -void intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo, - uint32_t read_domains, uint32_t write_domains, - uint32_t delta); -void intel_batchbuffer_require_space_bcs(VADriverContextP ctx, unsigned int size); -void intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size); -void intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx); -void intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size); -void intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx); -Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx); - -void intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total); -void intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx); - -void intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag); - -int intel_batchbuffer_check_free_space(VADriverContextP ctx, int size); -int intel_batchbuffer_check_free_space_bcs(VADriverContextP ctx, int size); - -#define __BEGIN_BATCH(ctx, n, flag) do { \ - intel_batchbuffer_check_batchbuffer_flag(ctx, flag); \ - intel_batchbuffer_require_space(ctx, (n) * 4); \ - intel_batchbuffer_begin_batch(ctx, (n)); \ +void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, unsigned int size); +void intel_batchbuffer_data(struct intel_batchbuffer *batch, void *data, unsigned int size); +void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch); +void intel_batchbuffer_flush(struct intel_batchbuffer *batch); +void intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total); +void intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch); +void intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag); +int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size); + +#define __BEGIN_BATCH(batch, n, f) do { \ + assert(f == batch->flag); \ + intel_batchbuffer_check_batchbuffer_flag(batch, f); \ + intel_batchbuffer_require_space(batch, (n) * 4); \ + intel_batchbuffer_begin_batch(batch, (n)); \ + } while (0) + +#define __OUT_BATCH(batch, d) do { \ + intel_batchbuffer_emit_dword(batch, d); \ } while (0) -#define BEGIN_BATCH(ctx, n) __BEGIN_BATCH(ctx, n, I915_EXEC_RENDER) -#define BEGIN_BLT_BATCH(ctx, n) __BEGIN_BATCH(ctx, n, I915_EXEC_BLT) - -#define OUT_BATCH(ctx, d) do { \ - intel_batchbuffer_emit_dword(ctx, d); \ -} while (0) - -#define OUT_RELOC(ctx, bo, read_domains, write_domain, delta) do { \ - assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc(ctx, bo, \ - read_domains, write_domain, delta); \ -} while (0) - -#define ADVANCE_BATCH(ctx) do { \ - intel_batchbuffer_advance_batch(ctx); \ -} while (0) - -#define BEGIN_BCS_BATCH(ctx, n) do { \ - intel_batchbuffer_require_space_bcs(ctx, (n) * 4); \ - intel_batchbuffer_begin_batch_bcs(ctx, (n)); \ -} while (0) - -#define OUT_BCS_BATCH(ctx, d) do { \ - intel_batchbuffer_emit_dword_bcs(ctx, d); \ -} while (0) - -#define OUT_BCS_RELOC(ctx, bo, read_domains, write_domain, delta) do { \ - assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc_bcs(ctx, bo, \ - read_domains, write_domain, delta); \ -} while (0) - -#define ADVANCE_BCS_BATCH(ctx) do { \ - intel_batchbuffer_advance_batch_bcs(ctx); \ -} while (0) +#define __OUT_RELOC(batch, bo, read_domains, write_domain, delta) do { \ + assert((delta) >= 0); \ + intel_batchbuffer_emit_reloc(batch, bo, \ + read_domains, write_domain, \ + delta); \ + } while (0) + +#define __ADVANCE_BATCH(batch) do { \ + intel_batchbuffer_advance_batch(batch); \ + } while (0) + +#define BEGIN_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_RENDER) +#define BEGIN_BLT_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_BLT) +#define BEGIN_BCS_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_BSD) + + +#define OUT_BATCH(batch, d) __OUT_BATCH(batch, d) +#define OUT_BLT_BATCH(batch, d) __OUT_BATCH(batch, d) +#define OUT_BCS_BATCH(batch, d) __OUT_BATCH(batch, d) + +#define OUT_RELOC(batch, bo, read_domains, write_domain, delta) \ + __OUT_RELOC(batch, bo, read_domains, write_domain, delta) +#define OUT_BLT_RELOC(batch, bo, read_domains, write_domain, delta) \ + __OUT_RELOC(batch, bo, read_domains, write_domain, delta) +#define OUT_BCS_RELOC(batch, bo, read_domains, write_domain, delta) \ + __OUT_RELOC(batch, bo, read_domains, write_domain, delta) + +#define ADVANCE_BATCH(batch) __ADVANCE_BATCH(batch) +#define ADVANCE_BLT_BATCH(batch) __ADVANCE_BATCH(batch) +#define ADVANCE_BCS_BATCH(batch) __ADVANCE_BATCH(batch) #endif /* _INTEL_BATCHBUFFER_H_ */ diff --git a/intel_driver.c b/intel_driver.c index bf7ecbe..da4adf0 100644 --- a/intel_driver.c +++ b/intel_driver.c @@ -81,8 +81,7 @@ intel_driver_init(VADriverContextP ctx) intel->has_blt = has_blt; intel_memman_init(intel); - intel_batchbuffer_init(intel); - + intel->batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER); return True; } @@ -92,8 +91,7 @@ intel_driver_terminate(VADriverContextP ctx) struct intel_driver_data *intel = intel_driver_data(ctx); intel_memman_terminate(intel); - intel_batchbuffer_terminate(intel); - + intel_batchbuffer_free(intel->batch); pthread_mutex_destroy(&intel->ctxmutex); return True; diff --git a/intel_driver.h b/intel_driver.h index b3f7460..03877c8 100644 --- a/intel_driver.h +++ b/intel_driver.h @@ -22,6 +22,7 @@ #define CMD_MI (0x0 << 29) #define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) #define MI_NOOP (CMD_MI | 0) @@ -43,6 +44,21 @@ #define BR13_565 (0x1 << 24) #define BR13_8888 (0x3 << 24) +#define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16)) +#define CMD_PIPE_CONTROL_NOWRITE (0 << 14) +#define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define CMD_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define CMD_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define CMD_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define CMD_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define CMD_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define CMD_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + + struct intel_batchbuffer; #define ALIGN(i, n) (((i) + (n) - 1) & ~((n) - 1)) @@ -90,7 +106,6 @@ struct intel_driver_data int locked; struct intel_batchbuffer *batch; - struct intel_batchbuffer *batch_bcs; dri_bufmgr *bufmgr; unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */ -- 2.7.4