Follow the spec to make BDW encoding media pipeline command support 48-bit addressing...
[platform/upstream/libva-intel-driver.git] / src / gen8_vme.c
index 64aebe7..5369b31 100644 (file)
@@ -226,11 +226,13 @@ gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
                                                    "VME batchbuffer",
                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
                                                    0x1000);
+       /*
     vme_context->vme_buffer_suface_setup(ctx,
                                          &vme_context->gpe_context,
                                          &vme_context->vme_batchbuffer,
                                          BINDING_TABLE_OFFSET(index),
                                          SURFACE_STATE_OFFSET(index));
+       */
 }
 
 static VAStatus
@@ -240,7 +242,6 @@ gen8_vme_surface_setup(VADriverContextP ctx,
                        struct intel_encoder_context *encoder_context)
 {
     struct object_surface *obj_surface;
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
 
     /*Setup surfaces state*/
     /* current picture for encoding */
@@ -252,43 +253,14 @@ gen8_vme_surface_setup(VADriverContextP ctx,
     if (!is_intra) {
        VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
        int slice_type;
-       struct object_surface *slice_obj_surface;
-       int ref_surface_id;
 
        slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+       assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
 
-       if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
-            slice_obj_surface = NULL;
-            ref_surface_id = slice_param->RefPicList0[0].picture_id;
-            if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
-                slice_obj_surface = SURFACE(ref_surface_id);
-            }
-            if (slice_obj_surface && slice_obj_surface->bo) {
-                obj_surface = slice_obj_surface;
-            } else {
-                obj_surface = encode_state->reference_objects[0];
-            }
-            /* reference 0 */
-            if (obj_surface && obj_surface->bo)
-                gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
-       }
-       if (slice_type == SLICE_TYPE_B) {
-            /* reference 1 */
-            slice_obj_surface = NULL;
-            ref_surface_id = slice_param->RefPicList1[0].picture_id;
-            if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
-                slice_obj_surface = SURFACE(ref_surface_id);
-            }
-            if (slice_obj_surface && slice_obj_surface->bo) {
-                obj_surface = slice_obj_surface;
-            } else {
-                obj_surface = encode_state->reference_objects[0];
-            }
+       intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state);
 
-            obj_surface = encode_state->reference_objects[1];
-            if (obj_surface && obj_surface->bo)
-                gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
-       }
+       if (slice_type == SLICE_TYPE_B)
+            intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state);
     }
 
     /* VME output */
@@ -306,11 +278,14 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
     struct gen8_interface_descriptor_data *desc;   
     int i;
     dri_bo *bo;
+    unsigned char *desc_ptr;
 
-    bo = vme_context->gpe_context.idrt.bo;
+    bo = vme_context->gpe_context.dynamic_state.bo;
     dri_bo_map(bo, 1);
     assert(bo->virtual);
-    desc = bo->virtual;
+    desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+
+    desc = (struct gen8_interface_descriptor_data *)desc_ptr;
 
     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
         struct i965_kernel *kernel;
@@ -318,23 +293,18 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
         assert(sizeof(*desc) == 32);
         /*Setup the descritor table*/
         memset(desc, 0, sizeof(*desc));
-        desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
+        desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
         desc->desc3.sampler_count = 0; /* FIXME: */
         desc->desc3.sampler_state_pointer = 0;
         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
         desc->desc5.constant_urb_entry_read_offset = 0;
         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
-               
+
                
-        /*kernel start*/
-        dri_bo_emit_reloc(bo,  
-                          I915_GEM_DOMAIN_INSTRUCTION, 0,
-                          0,
-                          i * sizeof(*desc) + offsetof(struct gen8_interface_descriptor_data, desc0),
-                          kernel->bo);
         desc++;
     }
+
     dri_bo_unmap(bo);
 
     return VA_STATUS_SUCCESS;
@@ -364,9 +334,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
 
     vme_state_message[31] = mv_num;
 
-    dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
-    assert(vme_context->gpe_context.curbe.bo->virtual);
-    constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
+    dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
+    assert(vme_context->gpe_context.dynamic_state.bo->virtual);
+    constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
+                                         vme_context->gpe_context.curbe_offset;
 
     /* VME MV/Mb cost table is passed by using const buffer */
     /* Now it uses the fixed search path. So it is constructed directly
@@ -374,11 +345,158 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
      */
     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
        
-    dri_bo_unmap(vme_context->gpe_context.curbe.bo);
+    dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
 
     return VA_STATUS_SUCCESS;
 }
 
+#define                MB_SCOREBOARD_A         (1 << 0)
+#define                MB_SCOREBOARD_B         (1 << 1)
+#define                MB_SCOREBOARD_C         (1 << 2)
+
+/* check whether the mb of (x_index, y_index) is out of bound */
+static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
+{
+    int mb_index;
+    if (x_index < 0 || x_index >= mb_width)
+        return -1;
+    if (y_index < 0 || y_index >= mb_height)
+        return -1;
+       
+    mb_index = y_index * mb_width + x_index;
+    if (mb_index < first_mb || mb_index > (first_mb + num_mb))
+        return -1;
+    return 0;
+}
+
+static void
+gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
+                                     struct encode_state *encode_state,
+                                     int mb_width, int mb_height,
+                                     int kernel,
+                                     int transform_8x8_mode_flag,
+                                     struct intel_encoder_context *encoder_context)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    int mb_row;
+    int s;
+    unsigned int *command_ptr;
+
+#define                USE_SCOREBOARD          (1 << 21)
+    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+       VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+       int first_mb = pSliceParameter->macroblock_address;
+       int num_mb = pSliceParameter->num_macroblocks;
+       unsigned int mb_intra_ub, score_dep;
+       int x_outer, y_outer, x_inner, y_inner;
+       int xtemp_outer = 0;
+
+       x_outer = first_mb % mb_width;
+       y_outer = first_mb / mb_width;
+       mb_row = y_outer;
+                                
+       for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+           x_inner = x_outer;
+           y_inner = y_outer;
+           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+               mb_intra_ub = 0;
+               score_dep = 0;
+               if (x_inner != 0) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                   score_dep |= MB_SCOREBOARD_A; 
+               }
+               if (y_inner != mb_row) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                   score_dep |= MB_SCOREBOARD_B;
+                   if (x_inner != 0)
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+                   if (x_inner != (mb_width -1)) {
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                       score_dep |= MB_SCOREBOARD_C;
+                    }
+               }
+                                                       
+               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+               *command_ptr++ = kernel;
+               *command_ptr++ = USE_SCOREBOARD;
+               /* Indirect data */
+               *command_ptr++ = 0;
+               /* the (X, Y) term of scoreboard */
+               *command_ptr++ = ((y_inner << 16) | x_inner);
+               *command_ptr++ = score_dep;
+               /*inline data */
+               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+               *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
+
+               x_inner -= 2;
+               y_inner += 1;
+           }
+           x_outer += 1;
+       }
+
+       xtemp_outer = mb_width - 2;
+       if (xtemp_outer < 0)
+            xtemp_outer = 0;
+       x_outer = xtemp_outer;
+       y_outer = first_mb / mb_width;
+       for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
+           y_inner = y_outer;
+           x_inner = x_outer;
+           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+               mb_intra_ub = 0;
+               score_dep = 0;
+               if (x_inner != 0) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                   score_dep |= MB_SCOREBOARD_A; 
+               }
+               if (y_inner != mb_row) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                   score_dep |= MB_SCOREBOARD_B;
+                   if (x_inner != 0)
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                   if (x_inner != (mb_width -1)) {
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                       score_dep |= MB_SCOREBOARD_C;
+                    }
+               }
+
+               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+               *command_ptr++ = kernel;
+               *command_ptr++ = USE_SCOREBOARD;
+               /* Indirect data */
+               *command_ptr++ = 0;
+               /* the (X, Y) term of scoreboard */
+               *command_ptr++ = ((y_inner << 16) | x_inner);
+               *command_ptr++ = score_dep;
+               /*inline data */
+               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+               *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
+               x_inner -= 2;
+               y_inner += 1;
+           }
+           x_outer++;
+           if (x_outer >= mb_width) {
+               y_outer += 1;
+               x_outer = xtemp_outer;
+           }           
+       }
+    }
+
+    *command_ptr++ = MI_BATCH_BUFFER_END;
+    *command_ptr++ = 0;
+
+    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
 
 static void
 gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
@@ -440,12 +558,14 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx,
             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
 
+            *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+            *command_ptr++ = 0;
             i += 1;
         } 
     }
 
-    *command_ptr++ = 0;
     *command_ptr++ = MI_BATCH_BUFFER_END;
+    *command_ptr++ = 0;
 
     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
 }
@@ -454,7 +574,7 @@ static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_conte
 {
     struct gen6_vme_context *vme_context = encoder_context->vme_context;
 
-    i965_gpe_context_init(ctx, &vme_context->gpe_context);
+    gen8_gpe_context_init(ctx, &vme_context->gpe_context);
 
     /* VME output buffer */
     dri_bo_unreference(vme_context->vme_output.bo);
@@ -490,6 +610,7 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx,
             break;
        }
     }
+
     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
        (pSliceParameter->slice_type == SLICE_TYPE_I)) {
        kernel_shader = VME_INTRA_SHADER;
@@ -502,7 +623,7 @@ static void gen8_vme_pipeline_programing(VADriverContextP ctx,
             kernel_shader = VME_INTER_SHADER;
     }
     if (allow_hwscore)
-       gen7_vme_walker_fill_vme_batchbuffer(ctx, 
+       gen8wa_vme_walker_fill_vme_batchbuffer(ctx, 
                                              encode_state,
                                              width_in_mbs, height_in_mbs,
                                              kernel_shader,
@@ -694,6 +815,135 @@ gen8_vme_mpeg2_surface_setup(VADriverContextP ctx,
 }
 
 static void
+gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
+                                           struct encode_state *encode_state,
+                                           int mb_width, int mb_height,
+                                           int kernel,
+                                           struct intel_encoder_context *encoder_context)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    unsigned int *command_ptr;
+
+#define                MPEG2_SCOREBOARD                (1 << 21)
+
+    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+    {
+       unsigned int mb_intra_ub, score_dep;
+       int x_outer, y_outer, x_inner, y_inner;
+       int xtemp_outer = 0;
+       int first_mb = 0;
+       int num_mb = mb_width * mb_height;
+
+       x_outer = 0;
+       y_outer = 0;
+       
+                                
+       for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+           x_inner = x_outer;
+           y_inner = y_outer;
+           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+               mb_intra_ub = 0;
+               score_dep = 0;
+               if (x_inner != 0) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                   score_dep |= MB_SCOREBOARD_A; 
+               }
+               if (y_inner != 0) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                   score_dep |= MB_SCOREBOARD_B;
+
+                   if (x_inner != 0)
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                   if (x_inner != (mb_width -1)) {
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                       score_dep |= MB_SCOREBOARD_C;
+                   }
+               }
+                                                       
+               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+               *command_ptr++ = kernel;
+               *command_ptr++ = MPEG2_SCOREBOARD;
+               /* Indirect data */
+               *command_ptr++ = 0;
+               /* the (X, Y) term of scoreboard */
+               *command_ptr++ = ((y_inner << 16) | x_inner);
+               *command_ptr++ = score_dep;
+               /*inline data */
+               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+               *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
+
+               x_inner -= 2;
+               y_inner += 1;
+           }
+           x_outer += 1;
+       }
+
+       xtemp_outer = mb_width - 2;
+       if (xtemp_outer < 0)
+            xtemp_outer = 0;
+       x_outer = xtemp_outer;
+       y_outer = 0;
+       for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
+           y_inner = y_outer;
+           x_inner = x_outer;
+           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+               mb_intra_ub = 0;
+               score_dep = 0;
+               if (x_inner != 0) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                   score_dep |= MB_SCOREBOARD_A; 
+               }
+               if (y_inner != 0) {
+                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                   score_dep |= MB_SCOREBOARD_B;
+
+                   if (x_inner != 0)
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                   if (x_inner != (mb_width -1)) {
+                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                       score_dep |= MB_SCOREBOARD_C;
+                   }
+               }
+
+               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+               *command_ptr++ = kernel;
+               *command_ptr++ = MPEG2_SCOREBOARD;
+               /* Indirect data */
+               *command_ptr++ = 0;
+               /* the (X, Y) term of scoreboard */
+               *command_ptr++ = ((y_inner << 16) | x_inner);
+               *command_ptr++ = score_dep;
+               /*inline data */
+               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+               *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
+               x_inner -= 2;
+               y_inner += 1;
+           }
+           x_outer++;
+           if (x_outer >= mb_width) {
+               y_outer += 1;
+               x_outer = xtemp_outer;
+           }           
+       }
+    }
+
+    *command_ptr++ = MI_BATCH_BUFFER_END;
+    *command_ptr++ = 0;
+
+    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+    return;
+}
+
+static void
 gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, 
                                     struct encode_state *encode_state,
                                     int mb_width, int mb_height,
@@ -750,6 +1000,8 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
 
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
                 i += 1;
             }
 
@@ -757,8 +1009,8 @@ gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
         }
     }
 
-    *command_ptr++ = 0;
     *command_ptr++ = MI_BATCH_BUFFER_END;
+    *command_ptr++ = 0;
 
     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
 }
@@ -800,7 +1052,7 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
     }
 
     if (allow_hwscore) 
-       gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
+       gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
                                                    encode_state,
                                                    width_in_mbs, height_in_mbs,
                                                    kernel_shader,
@@ -815,12 +1067,14 @@ gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
 
     intel_batchbuffer_start_atomic(batch, 0x1000);
     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
-    BEGIN_BATCH(batch, 2);
-    OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+    BEGIN_BATCH(batch, 4);
+    OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
     OUT_RELOC(batch,
               vme_context->vme_batchbuffer.bo,
               I915_GEM_DOMAIN_COMMAND, 0, 
               0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
     ADVANCE_BATCH(batch);
 
     intel_batchbuffer_end_atomic(batch);       
@@ -874,7 +1128,7 @@ gen8_vme_context_destroy(void *context)
 {
     struct gen6_vme_context *vme_context = context;
 
-    i965_gpe_context_destroy(&vme_context->gpe_context);
+    gen8_gpe_context_destroy(&vme_context->gpe_context);
 
     dri_bo_unreference(vme_context->vme_output.bo);
     vme_context->vme_output.bo = NULL;
@@ -922,10 +1176,10 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
     vme_context->vme_kernel_sum = i965_kernel_num;
     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 
-    vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
-    vme_context->gpe_context.idrt.entry_size = sizeof(struct gen8_interface_descriptor_data);
+    vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+    vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+    vme_context->gpe_context.sampler_size = 0;
 
-    vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
 
     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
@@ -935,7 +1189,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
 
     gen7_vme_scoreboard_init(ctx, vme_context);
 
-    i965_gpe_load_kernels(ctx,
+    gen8_gpe_load_kernels(ctx,
                           &vme_context->gpe_context,
                           vme_kernel_list,
                           i965_kernel_num);