From e90a3c3a41a07734163852ff9868336d5ec121b0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 13 Dec 2013 17:03:47 +0800 Subject: [PATCH] Follow the spec to make BDW encoding media pipeline command support 48-bit addressing mode Signed-off-by: Zhao Yakui --- src/gen8_vme.c | 37 +++++----- src/i965_drv_video.h | 1 + src/i965_gpe_utils.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/i965_gpe_utils.h | 33 +++++++++ 4 files changed, 234 insertions(+), 24 deletions(-) diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 464c344..5369b31 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -278,11 +278,14 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, struct gen8_interface_descriptor_data *desc; int i; dri_bo *bo; + unsigned char *desc_ptr; - bo = vme_context->gpe_context.idrt.bo; + bo = vme_context->gpe_context.dynamic_state.bo; dri_bo_map(bo, 1); assert(bo->virtual); - desc = bo->virtual; + desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset; + + desc = (struct gen8_interface_descriptor_data *)desc_ptr; for (i = 0; i < vme_context->vme_kernel_sum; i++) { struct i965_kernel *kernel; @@ -290,7 +293,7 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, assert(sizeof(*desc) == 32); /*Setup the descritor table*/ memset(desc, 0, sizeof(*desc)); - desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; desc->desc3.sampler_count = 0; /* FIXME: */ desc->desc3.sampler_state_pointer = 0; desc->desc4.binding_table_entry_count = 1; /* FIXME: */ @@ -299,14 +302,9 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; - /*kernel start*/ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(*desc) + offsetof(struct gen8_interface_descriptor_data, desc0), - kernel->bo); desc++; } + dri_bo_unmap(bo); return VA_STATUS_SUCCESS; @@ -336,9 +334,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, vme_state_message[31] = mv_num; - dri_bo_map(vme_context->gpe_context.curbe.bo, 1); - assert(vme_context->gpe_context.curbe.bo->virtual); - constant_buffer = vme_context->gpe_context.curbe.bo->virtual; + dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1); + assert(vme_context->gpe_context.dynamic_state.bo->virtual); + constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual + + vme_context->gpe_context.curbe_offset; /* VME MV/Mb cost table is passed by using const buffer */ /* Now it uses the fixed search path. So it is constructed directly @@ -346,7 +345,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, */ memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128); - dri_bo_unmap(vme_context->gpe_context.curbe.bo); + dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo); return VA_STATUS_SUCCESS; } @@ -575,7 +574,7 @@ static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_conte { struct gen6_vme_context *vme_context = encoder_context->vme_context; - i965_gpe_context_init(ctx, &vme_context->gpe_context); + gen8_gpe_context_init(ctx, &vme_context->gpe_context); /* VME output buffer */ dri_bo_unreference(vme_context->vme_output.bo); @@ -1129,7 +1128,7 @@ gen8_vme_context_destroy(void *context) { struct gen6_vme_context *vme_context = context; - i965_gpe_context_destroy(&vme_context->gpe_context); + gen8_gpe_context_destroy(&vme_context->gpe_context); dri_bo_unreference(vme_context->vme_output.bo); vme_context->vme_output.bo = NULL; @@ -1177,10 +1176,10 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->vme_kernel_sum = i965_kernel_num; vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; - vme_context->gpe_context.idrt.entry_size = sizeof(struct gen8_interface_descriptor_data); + vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; + vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH; + vme_context->gpe_context.sampler_size = 0; - vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH; vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; vme_context->gpe_context.vfe_state.num_urb_entries = 16; @@ -1190,7 +1189,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e gen7_vme_scoreboard_init(ctx, vme_context); - i965_gpe_load_kernels(ctx, + gen8_gpe_load_kernels(ctx, &vme_context->gpe_context, vme_kernel_list, i965_kernel_num); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 1a101f4..98e08fe 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -78,6 +78,7 @@ struct i965_kernel const uint32_t (*bin)[4]; int size; dri_bo *bo; + unsigned int kernel_offset; }; struct buffer_store diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 2af323f..0d49703 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -949,18 +949,39 @@ gen8_gpe_state_base_address(VADriverContextP ctx, OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); + /*DW4 Surface state base address */ OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ OUT_BATCH(batch, 0); + /*DW6. Dynamic state base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address + if (gpe_context->dynamic_state.bo) + OUT_RELOC(batch, gpe_context->dynamic_state.bo, + I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); /*DW8. Indirect Object base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address + if (gpe_context->indirect_state.bo) + OUT_RELOC(batch, gpe_context->indirect_state.bo, + I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + /*DW10. Instruct base address */ - OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address + if (gpe_context->instruction_state.bo) + OUT_RELOC(batch, gpe_context->instruction_state.bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); /* DW12. Size limitation */ @@ -1008,6 +1029,38 @@ gen8_gpe_vfe_state(VADriverContextP ctx, } + +static void +gen8_gpe_curbe_load(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 4); + + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, gpe_context->curbe_size); + OUT_BATCH(batch, gpe_context->curbe_offset); + + ADVANCE_BATCH(batch); +} + +static void +gen8_gpe_idrt(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 4); + + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, gpe_context->idrt_size); + OUT_BATCH(batch, gpe_context->idrt_offset); + + ADVANCE_BATCH(batch); +} + + void gen8_gpe_pipeline_setup(VADriverContextP ctx, struct i965_gpe_context *gpe_context, @@ -1018,7 +1071,131 @@ gen8_gpe_pipeline_setup(VADriverContextP ctx, i965_gpe_select(ctx, gpe_context, batch); gen8_gpe_state_base_address(ctx, gpe_context, batch); gen8_gpe_vfe_state(ctx, gpe_context, batch); - gen6_gpe_curbe_load(ctx, gpe_context, batch); - gen6_gpe_idrt(ctx, gpe_context, batch); + gen8_gpe_curbe_load(ctx, gpe_context, batch); + gen8_gpe_idrt(ctx, gpe_context, batch); +} + +void +gen8_gpe_context_init(VADriverContextP ctx, + struct i965_gpe_context *gpe_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + int bo_size; + unsigned int end_offset; + + dri_bo_unreference(gpe_context->surface_state_binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + gpe_context->surface_state_binding_table.length, + 4096); + assert(bo); + gpe_context->surface_state_binding_table.bo = bo; + + bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192; + dri_bo_unreference(gpe_context->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + bo_size, + 4096); + assert(bo); + gpe_context->dynamic_state.bo = bo; + gpe_context->dynamic_state.bo_size = bo_size; + + end_offset = 0; + gpe_context->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + gpe_context->curbe_offset = ALIGN(end_offset, 64); + end_offset += gpe_context->curbe_size; + + /* Interface descriptor offset */ + gpe_context->idrt_offset = ALIGN(end_offset, 64); + end_offset += gpe_context->idrt_size; + + /* Sampler state offset */ + gpe_context->sampler_offset = ALIGN(end_offset, 64); + end_offset += gpe_context->sampler_size; + + /* update the end offset of dynamic_state */ + gpe_context->dynamic_state.end_offset = end_offset; +} + + +void +gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context) +{ + int i; + + dri_bo_unreference(gpe_context->surface_state_binding_table.bo); + gpe_context->surface_state_binding_table.bo = NULL; + + dri_bo_unreference(gpe_context->instruction_state.bo); + gpe_context->instruction_state.bo = NULL; + + dri_bo_unreference(gpe_context->dynamic_state.bo); + gpe_context->dynamic_state.bo = NULL; + + dri_bo_unreference(gpe_context->indirect_state.bo); + gpe_context->indirect_state.bo = NULL; + +} + + +void +gen8_gpe_load_kernels(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct i965_kernel *kernel; + + assert(num_kernels <= MAX_GPE_KERNELS); + memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels); + gpe_context->num_kernels = num_kernels; + + kernel_size = num_kernels * 64; + for (i = 0; i < num_kernels; i++) { + kernel = &gpe_context->kernels[i]; + + kernel_size += kernel->size; + } + + gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (gpe_context->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); + return; + } + + assert(gpe_context->instruction_state.bo); + + gpe_context->instruction_state.bo_size = kernel_size; + gpe_context->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(gpe_context->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual); + for (i = 0; i < num_kernels; i++) { + kernel_offset = ALIGN(end_offset, 64); + kernel = &gpe_context->kernels[i]; + kernel->kernel_offset = kernel_offset; + + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + + end_offset += kernel->size; + } + + gpe_context->instruction_state.end_offset = end_offset; + + dri_bo_unmap(gpe_context->instruction_state.bo); + + return; } diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index b96916d..2331152 100644 --- a/src/i965_gpe_utils.h +++ b/src/i965_gpe_utils.h @@ -114,6 +114,29 @@ struct i965_gpe_context unsigned int num_kernels; struct i965_kernel kernels[MAX_GPE_KERNELS]; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int sampler_offset; + int sampler_size; + unsigned int idrt_offset; + int idrt_size; + unsigned int curbe_offset; + int curbe_size; }; void i965_gpe_context_destroy(struct i965_gpe_context *gpe_context); @@ -186,4 +209,14 @@ extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, void gen8_gpe_pipeline_setup(VADriverContextP ctx, struct i965_gpe_context *gpe_context, struct intel_batchbuffer *batch); + + +void gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context); +void gen8_gpe_context_init(VADriverContextP ctx, + struct i965_gpe_context *gpe_context); + +void gen8_gpe_load_kernels(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels); #endif /* _I965_GPE_UTILS_H_ */ -- 2.7.4