Silence a bunch of warnings
[profile/ivi/vaapi-intel-driver.git] / src / gen75_vme.c
index 4bfe085..5853757 100644 (file)
@@ -55,6 +55,7 @@
 
 #define VME_INTRA_SHADER        0
 #define VME_INTER_SHADER        1
+#define VME_BINTER_SHADER      3
 #define VME_BATCHBUFFER         2
 
 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
 
 #define VME_MSG_LENGTH         32
   
-#define                MB_SCOREBOARD_A         (1 << 0)
-#define                MB_SCOREBOARD_B         (1 << 1)
-#define                MB_SCOREBOARD_C         (1 << 2)
-
 static const uint32_t gen75_vme_intra_frame[][4] = {
 #include "shaders/vme/intra_frame_haswell.g75b"
 };
@@ -75,6 +72,10 @@ static const uint32_t gen75_vme_inter_frame[][4] = {
 #include "shaders/vme/inter_frame_haswell.g75b"
 };
 
+static const uint32_t gen75_vme_inter_bframe[][4] = {
+#include "shaders/vme/inter_bframe_haswell.g75b"
+};
+
 static const uint32_t gen75_vme_batchbuffer[][4] = {
 #include "shaders/vme/batchbuffer.g75b"
 };
@@ -101,6 +102,13 @@ static struct i965_kernel gen75_vme_kernels[] = {
         sizeof(gen75_vme_batchbuffer),
         NULL
     },
+    {
+        "VME inter BFrame",
+        VME_BINTER_SHADER,
+        gen75_vme_inter_bframe,
+        sizeof(gen75_vme_inter_bframe),
+        NULL
+    }
 };
 
 static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
@@ -305,7 +313,7 @@ static VAStatus gen75_vme_interface_setup(VADriverContextP ctx,
     assert(bo->virtual);
     desc = bo->virtual;
 
-    for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+    for (i = 0; i < vme_context->vme_kernel_sum; i++) {
         struct i965_kernel *kernel;
         kernel = &vme_context->gpe_context.kernels[i];
         assert(sizeof(*desc) == 32);
@@ -444,7 +452,7 @@ static void gen75_vme_state_setup_fixup(VADriverContextP ctx,
     if (encoder_context->rate_control_mode == VA_RC_CQP)
         vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
     else
-        vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
+        vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
 }
 
 static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
@@ -486,11 +494,6 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
-#define                INTRA_PRED_AVAIL_FLAG_AE        0x60
-#define                INTRA_PRED_AVAIL_FLAG_B         0x10
-#define                INTRA_PRED_AVAIL_FLAG_C         0x8
-#define                INTRA_PRED_AVAIL_FLAG_D         0x4
-#define                INTRA_PRED_AVAIL_FLAG_BCD_MASK  0x1C
 
 static void
 gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
@@ -562,82 +565,9 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
 }
 
-
-static void
-gen75_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
-                              struct encode_state *encode_state,
-                              int mb_width, int mb_height,
-                              int kernel,
-                              int transform_8x8_mode_flag,
-                              struct intel_encoder_context *encoder_context)
-{
-    struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    int mb_x = 0, mb_y = 0;
-    int mb_row;
-    int i, s;
-    unsigned int *command_ptr;
-    int temp;
-
-
-#define                USE_SCOREBOARD          (1 << 21)
-    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
-    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
-
-    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
-        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
-        int slice_mb_begin = pSliceParameter->macroblock_address;
-        int slice_mb_number = pSliceParameter->num_macroblocks;
-        unsigned int mb_intra_ub, score_dep;
-       int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
-       mb_row = slice_mb_begin / mb_width; 
-        for (i = 0; i < slice_mb_number;  ) {
-            int mb_count = i + slice_mb_begin;    
-            mb_x = mb_count % mb_width;
-            mb_y = mb_count / mb_width;
-           mb_intra_ub = 0;
-           score_dep = 0;
-           if (mb_x != 0) {
-               mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-               score_dep |= MB_SCOREBOARD_A;
-           }
-           if (mb_y != mb_row) {
-               mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-               score_dep |= MB_SCOREBOARD_B;
-               if (mb_x != 0)
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-               if (mb_x != (mb_width -1)) {
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                       score_dep |= MB_SCOREBOARD_C;
-               }
-           }
-
-               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-               *command_ptr++ = kernel;
-               *command_ptr++ = USE_SCOREBOARD;
-               *command_ptr++ = 0;
-               /* the (X, Y) term of scoreboard */
-               *command_ptr++ = ((mb_y << 16) | mb_x);
-               *command_ptr++ = score_dep;
-               /*inline data */
-               *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
-               *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
-
-            i += 1;
-        } 
-    }
-
-    *command_ptr++ = 0;
-    *command_ptr++ = MI_BATCH_BUFFER_END;
-
-    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
-}
-
 static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    dri_bo *bo;
 
     i965_gpe_context_init(ctx, &vme_context->gpe_context);
 
@@ -662,9 +592,9 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx,
     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
-    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int kernel_shader;
     bool allow_hwscore = true;
     int s;
 
@@ -675,19 +605,29 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx,
                break;
        }
     }
-
+    if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
+       (pSliceParameter->slice_type == SLICE_TYPE_I)) {
+       kernel_shader = VME_INTRA_SHADER;
+   } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
+       (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
+       kernel_shader = VME_INTER_SHADER;
+   } else {
+       kernel_shader = VME_BINTER_SHADER;
+       if (!allow_hwscore)
+            kernel_shader = VME_INTER_SHADER;
+   }
     if (allow_hwscore)
-       gen75_vme_walker_fill_vme_batchbuffer(ctx, 
+       gen7_vme_walker_fill_vme_batchbuffer(ctx, 
                                   encode_state,
                                   width_in_mbs, height_in_mbs,
-                                  is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+                                  kernel_shader,
                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
                                   encoder_context);
     else
        gen75_vme_fill_vme_batchbuffer(ctx, 
                                    encode_state,
                                    width_in_mbs, height_in_mbs,
-                                   is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+                                   kernel_shader,
                                    pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
                                    encoder_context);
 
@@ -882,11 +822,6 @@ gen75_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
     int i, s, j;
     unsigned int *command_ptr;
 
-#define                INTRA_PRED_AVAIL_FLAG_AE        0x60
-#define                INTRA_PRED_AVAIL_FLAG_B         0x10
-#define                INTRA_PRED_AVAIL_FLAG_C         0x8
-#define                INTRA_PRED_AVAIL_FLAG_D         0x4
-#define                INTRA_PRED_AVAIL_FLAG_BCD_MASK  0x1C
 
     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
@@ -1054,6 +989,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
 {
     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
     struct i965_kernel *vme_kernel_list = NULL;
+       int i965_kernel_num;
 
     switch (encoder_context->profile) {
     case VAProfileH264Baseline:
@@ -1061,13 +997,14 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
     case VAProfileH264High:
         vme_kernel_list = gen75_vme_kernels;
         encoder_context->vme_pipeline = gen75_vme_pipeline;
-        
+               i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); 
         break;
 
     case VAProfileMPEG2Simple:
     case VAProfileMPEG2Main:
         vme_kernel_list = gen75_vme_mpeg2_kernels;
         encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
+               i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
 
         break;
 
@@ -1077,7 +1014,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
 
         break;
     }
-
+    vme_context->vme_kernel_sum = i965_kernel_num;
     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 
     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
@@ -1091,29 +1028,12 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 
-       vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
-       vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
-       vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
-                                                               MB_SCOREBOARD_B |
-                                                               MB_SCOREBOARD_C);
-
-       /* In VME prediction the current mb depends on the neighbour 
-        * A/B/C macroblock. So the left/up/up-right dependency should
-        * be considered.
-        */
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
-       
-       vme_context->gpe_context.vfe_desc7.dword = 0;
+    gen7_vme_scoreboard_init(ctx, vme_context);
 
     i965_gpe_load_kernels(ctx,
                           &vme_context->gpe_context,
                           vme_kernel_list,
-                          GEN6_VME_KERNEL_NUMBER);
+                          i965_kernel_num);
     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;