Follow the spec to make BDW encoding media pipeline command support 48-bit addressing...
authorZhao Yakui <yakui.zhao@intel.com>
Fri, 13 Dec 2013 09:03:47 +0000 (17:03 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Thu, 27 Feb 2014 02:22:45 +0000 (10:22 +0800)
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
src/gen8_vme.c
src/i965_drv_video.h
src/i965_gpe_utils.c
src/i965_gpe_utils.h

index 464c344..5369b31 100644 (file)
@@ -278,11 +278,14 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
     struct gen8_interface_descriptor_data *desc;   
     int i;
     dri_bo *bo;
+    unsigned char *desc_ptr;
 
-    bo = vme_context->gpe_context.idrt.bo;
+    bo = vme_context->gpe_context.dynamic_state.bo;
     dri_bo_map(bo, 1);
     assert(bo->virtual);
-    desc = bo->virtual;
+    desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+
+    desc = (struct gen8_interface_descriptor_data *)desc_ptr;
 
     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
         struct i965_kernel *kernel;
@@ -290,7 +293,7 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
         assert(sizeof(*desc) == 32);
         /*Setup the descritor table*/
         memset(desc, 0, sizeof(*desc));
-        desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
+        desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
         desc->desc3.sampler_count = 0; /* FIXME: */
         desc->desc3.sampler_state_pointer = 0;
         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
@@ -299,14 +302,9 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
 
                
-        /*kernel start*/
-        dri_bo_emit_reloc(bo,  
-                          I915_GEM_DOMAIN_INSTRUCTION, 0,
-                          0,
-                          i * sizeof(*desc) + offsetof(struct gen8_interface_descriptor_data, desc0),
-                          kernel->bo);
         desc++;
     }
+
     dri_bo_unmap(bo);
 
     return VA_STATUS_SUCCESS;
@@ -336,9 +334,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
 
     vme_state_message[31] = mv_num;
 
-    dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
-    assert(vme_context->gpe_context.curbe.bo->virtual);
-    constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
+    dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
+    assert(vme_context->gpe_context.dynamic_state.bo->virtual);
+    constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
+                                         vme_context->gpe_context.curbe_offset;
 
     /* VME MV/Mb cost table is passed by using const buffer */
     /* Now it uses the fixed search path. So it is constructed directly
@@ -346,7 +345,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
      */
     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
        
-    dri_bo_unmap(vme_context->gpe_context.curbe.bo);
+    dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
 
     return VA_STATUS_SUCCESS;
 }
@@ -575,7 +574,7 @@ static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_conte
 {
     struct gen6_vme_context *vme_context = encoder_context->vme_context;
 
-    i965_gpe_context_init(ctx, &vme_context->gpe_context);
+    gen8_gpe_context_init(ctx, &vme_context->gpe_context);
 
     /* VME output buffer */
     dri_bo_unreference(vme_context->vme_output.bo);
@@ -1129,7 +1128,7 @@ gen8_vme_context_destroy(void *context)
 {
     struct gen6_vme_context *vme_context = context;
 
-    i965_gpe_context_destroy(&vme_context->gpe_context);
+    gen8_gpe_context_destroy(&vme_context->gpe_context);
 
     dri_bo_unreference(vme_context->vme_output.bo);
     vme_context->vme_output.bo = NULL;
@@ -1177,10 +1176,10 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
     vme_context->vme_kernel_sum = i965_kernel_num;
     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 
-    vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
-    vme_context->gpe_context.idrt.entry_size = sizeof(struct gen8_interface_descriptor_data);
+    vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+    vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+    vme_context->gpe_context.sampler_size = 0;
 
-    vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
 
     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
@@ -1190,7 +1189,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
 
     gen7_vme_scoreboard_init(ctx, vme_context);
 
-    i965_gpe_load_kernels(ctx,
+    gen8_gpe_load_kernels(ctx,
                           &vme_context->gpe_context,
                           vme_kernel_list,
                           i965_kernel_num);
index 1a101f4..98e08fe 100644 (file)
@@ -78,6 +78,7 @@ struct i965_kernel
     const uint32_t (*bin)[4];
     int size;
     dri_bo *bo;
+    unsigned int kernel_offset;
 };
 
 struct buffer_store
index 2af323f..0d49703 100644 (file)
@@ -949,18 +949,39 @@ gen8_gpe_state_base_address(VADriverContextP ctx,
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                         //General State Base Address
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch, 0);
+
        /*DW4 Surface state base address */
     OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
     OUT_BATCH(batch, 0);
+
        /*DW6. Dynamic state base address */
-    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                         //Dynamic State Base Address
+    if (gpe_context->dynamic_state.bo)
+        OUT_RELOC(batch, gpe_context->dynamic_state.bo,
+                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
+                  0, BASE_ADDRESS_MODIFY);
+    else
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+
     OUT_BATCH(batch, 0);
 
        /*DW8. Indirect Object base address */
-    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                         //Indirect Object Base Address
+    if (gpe_context->indirect_state.bo)
+        OUT_RELOC(batch, gpe_context->indirect_state.bo,
+                  I915_GEM_DOMAIN_SAMPLER,
+                  0, BASE_ADDRESS_MODIFY);
+    else
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+
     OUT_BATCH(batch, 0);
+
        /*DW10. Instruct base address */
-    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                         //Instruction Base Address
+    if (gpe_context->instruction_state.bo)
+        OUT_RELOC(batch, gpe_context->instruction_state.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION,
+                  0, BASE_ADDRESS_MODIFY);
+    else
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+
     OUT_BATCH(batch, 0);
 
        /* DW12. Size limitation */
@@ -1008,6 +1029,38 @@ gen8_gpe_vfe_state(VADriverContextP ctx,
 
 }
 
+
+static void
+gen8_gpe_curbe_load(VADriverContextP ctx,
+                    struct i965_gpe_context *gpe_context,
+                    struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 4);
+
+    OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, gpe_context->curbe_size);
+    OUT_BATCH(batch, gpe_context->curbe_offset);
+
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_gpe_idrt(VADriverContextP ctx,
+              struct i965_gpe_context *gpe_context,
+              struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 4);
+
+    OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, gpe_context->idrt_size);
+    OUT_BATCH(batch, gpe_context->idrt_offset);
+
+    ADVANCE_BATCH(batch);
+}
+
+
 void
 gen8_gpe_pipeline_setup(VADriverContextP ctx,
                         struct i965_gpe_context *gpe_context,
@@ -1018,7 +1071,131 @@ gen8_gpe_pipeline_setup(VADriverContextP ctx,
     i965_gpe_select(ctx, gpe_context, batch);
     gen8_gpe_state_base_address(ctx, gpe_context, batch);
     gen8_gpe_vfe_state(ctx, gpe_context, batch);
-    gen6_gpe_curbe_load(ctx, gpe_context, batch);
-    gen6_gpe_idrt(ctx, gpe_context, batch);
+    gen8_gpe_curbe_load(ctx, gpe_context, batch);
+    gen8_gpe_idrt(ctx, gpe_context, batch);
+}
+
+void
+gen8_gpe_context_init(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    dri_bo *bo;
+    int bo_size;
+    unsigned int end_offset;
+
+    dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      gpe_context->surface_state_binding_table.length,
+                      4096);
+    assert(bo);
+    gpe_context->surface_state_binding_table.bo = bo;
+
+    bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
+    dri_bo_unreference(gpe_context->dynamic_state.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      bo_size,
+                      4096);
+    assert(bo);
+    gpe_context->dynamic_state.bo = bo;
+    gpe_context->dynamic_state.bo_size = bo_size;
+
+    end_offset = 0;
+    gpe_context->dynamic_state.end_offset = 0;
+
+    /* Constant buffer offset */
+    gpe_context->curbe_offset = ALIGN(end_offset, 64);
+    end_offset += gpe_context->curbe_size;
+
+    /* Interface descriptor offset */
+    gpe_context->idrt_offset = ALIGN(end_offset, 64);
+    end_offset += gpe_context->idrt_size;
+
+    /* Sampler state offset */
+    gpe_context->sampler_offset = ALIGN(end_offset, 64);
+    end_offset += gpe_context->sampler_size;
+
+    /* update the end offset of dynamic_state */
+    gpe_context->dynamic_state.end_offset = end_offset;
+}
+
+
+void
+gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
+{
+    int i;
+
+    dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
+    gpe_context->surface_state_binding_table.bo = NULL;
+
+    dri_bo_unreference(gpe_context->instruction_state.bo);
+    gpe_context->instruction_state.bo = NULL;
+
+    dri_bo_unreference(gpe_context->dynamic_state.bo);
+    gpe_context->dynamic_state.bo = NULL;
+
+    dri_bo_unreference(gpe_context->indirect_state.bo);
+    gpe_context->indirect_state.bo = NULL;
+
+}
+
+
+void
+gen8_gpe_load_kernels(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context,
+                      struct i965_kernel *kernel_list,
+                      unsigned int num_kernels)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i, kernel_size;
+    unsigned int kernel_offset, end_offset;
+    unsigned char *kernel_ptr;
+    struct i965_kernel *kernel;
+
+    assert(num_kernels <= MAX_GPE_KERNELS);
+    memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
+    gpe_context->num_kernels = num_kernels;
+
+    kernel_size = num_kernels * 64;
+    for (i = 0; i < num_kernels; i++) {
+        kernel = &gpe_context->kernels[i];
+
+        kernel_size += kernel->size;
+    }
+
+    gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                  "kernel shader",
+                                  kernel_size,
+                                  0x1000);
+    if (gpe_context->instruction_state.bo == NULL) {
+        WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
+        return;
+    }
+
+    assert(gpe_context->instruction_state.bo);
+
+    gpe_context->instruction_state.bo_size = kernel_size;
+    gpe_context->instruction_state.end_offset = 0;
+    end_offset = 0;
+
+    dri_bo_map(gpe_context->instruction_state.bo, 1);
+    kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
+    for (i = 0; i < num_kernels; i++) {
+        kernel_offset = ALIGN(end_offset, 64);
+        kernel = &gpe_context->kernels[i];
+        kernel->kernel_offset = kernel_offset;
+
+        memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
+
+        end_offset += kernel->size;
+    }
+
+    gpe_context->instruction_state.end_offset = end_offset;
+
+    dri_bo_unmap(gpe_context->instruction_state.bo);
+
+    return;
 }
 
index b96916d..2331152 100644 (file)
@@ -114,6 +114,29 @@ struct i965_gpe_context
 
     unsigned int num_kernels;
     struct i965_kernel kernels[MAX_GPE_KERNELS];
+
+    struct {
+        dri_bo *bo;
+        int bo_size;
+        unsigned int end_offset;
+    } instruction_state;
+
+    struct {
+        dri_bo *bo;
+    } indirect_state;
+
+    struct {
+        dri_bo *bo;
+        int bo_size;
+        unsigned int end_offset;
+    } dynamic_state;
+
+    unsigned int sampler_offset;
+    int sampler_size;
+    unsigned int idrt_offset;
+    int idrt_size;
+    unsigned int curbe_offset;
+    int curbe_size;
 };
 
 void i965_gpe_context_destroy(struct i965_gpe_context *gpe_context);
@@ -186,4 +209,14 @@ extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
 void gen8_gpe_pipeline_setup(VADriverContextP ctx,
                              struct i965_gpe_context *gpe_context,
                              struct intel_batchbuffer *batch);
+
+
+void gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context);
+void gen8_gpe_context_init(VADriverContextP ctx,
+                           struct i965_gpe_context *gpe_context);
+
+void gen8_gpe_load_kernels(VADriverContextP ctx,
+                           struct i965_gpe_context *gpe_context,
+                           struct i965_kernel *kernel_list,
+                           unsigned int num_kernels);
 #endif /* _I965_GPE_UTILS_H_ */