Silence a bunch of warnings
[profile/ivi/vaapi-intel-driver.git] / src / gen75_vme.c
index 7bfe258..5853757 100644 (file)
@@ -55,6 +55,7 @@
 
 #define VME_INTRA_SHADER        0
 #define VME_INTER_SHADER        1
+#define VME_BINTER_SHADER      3
 #define VME_BATCHBUFFER         2
 
 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
 
 #define VME_MSG_LENGTH         32
   
-#define                MB_SCOREBOARD_A         (1 << 0)
-#define                MB_SCOREBOARD_B         (1 << 1)
-#define                MB_SCOREBOARD_C         (1 << 2)
-
 static const uint32_t gen75_vme_intra_frame[][4] = {
 #include "shaders/vme/intra_frame_haswell.g75b"
 };
@@ -75,6 +72,10 @@ static const uint32_t gen75_vme_inter_frame[][4] = {
 #include "shaders/vme/inter_frame_haswell.g75b"
 };
 
+static const uint32_t gen75_vme_inter_bframe[][4] = {
+#include "shaders/vme/inter_bframe_haswell.g75b"
+};
+
 static const uint32_t gen75_vme_batchbuffer[][4] = {
 #include "shaders/vme/batchbuffer.g75b"
 };
@@ -101,6 +102,13 @@ static struct i965_kernel gen75_vme_kernels[] = {
         sizeof(gen75_vme_batchbuffer),
         NULL
     },
+    {
+        "VME inter BFrame",
+        VME_BINTER_SHADER,
+        gen75_vme_inter_bframe,
+        sizeof(gen75_vme_inter_bframe),
+        NULL
+    }
 };
 
 static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
@@ -305,7 +313,7 @@ static VAStatus gen75_vme_interface_setup(VADriverContextP ctx,
     assert(bo->virtual);
     desc = bo->virtual;
 
-    for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+    for (i = 0; i < vme_context->vme_kernel_sum; i++) {
         struct i965_kernel *kernel;
         kernel = &vme_context->gpe_context.kernels[i];
         assert(sizeof(*desc) == 32);
@@ -444,7 +452,7 @@ static void gen75_vme_state_setup_fixup(VADriverContextP ctx,
     if (encoder_context->rate_control_mode == VA_RC_CQP)
         vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
     else
-        vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
+        vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
 }
 
 static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
@@ -486,11 +494,6 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
-#define                INTRA_PRED_AVAIL_FLAG_AE        0x60
-#define                INTRA_PRED_AVAIL_FLAG_B         0x10
-#define                INTRA_PRED_AVAIL_FLAG_C         0x8
-#define                INTRA_PRED_AVAIL_FLAG_D         0x4
-#define                INTRA_PRED_AVAIL_FLAG_BCD_MASK  0x1C
 
 static void
 gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
@@ -562,155 +565,9 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
 }
 
-/* check whether the mb of (x_index, y_index) is out of bound */
-static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
-{
-       int mb_index;
-       if (x_index < 0 || x_index >= mb_width)
-               return -1;
-       if (y_index < 0 || y_index >= mb_height)
-               return -1;
-       
-       mb_index = y_index * mb_width + x_index;
-       if (mb_index < first_mb || mb_index > (first_mb + num_mb))
-               return -1;
-       return 0;
-}
-
-
-static void
-gen75_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
-                              struct encode_state *encode_state,
-                              int mb_width, int mb_height,
-                              int kernel,
-                              int transform_8x8_mode_flag,
-                              struct intel_encoder_context *encoder_context)
-{
-    struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    int mb_x = 0, mb_y = 0;
-    int mb_row;
-    int s;
-    unsigned int *command_ptr;
-    int temp;
-
-
-#define                USE_SCOREBOARD          (1 << 21)
-    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
-    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
-
-    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
-        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
-        int first_mb = pSliceParameter->macroblock_address;
-        int num_mb = pSliceParameter->num_macroblocks;
-        unsigned int mb_intra_ub, score_dep;
-       int x_outer, y_outer, x_inner, y_inner;
-
-       x_outer = first_mb % mb_width;
-       y_outer = first_mb / mb_width;
-       mb_row = y_outer;
-                                
-       for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
-               x_inner = x_outer;
-               y_inner = y_outer;
-               for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
-                       mb_intra_ub = 0;
-                       score_dep = 0;
-                       if (x_inner != 0) {
-                               mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-                               score_dep |= MB_SCOREBOARD_A; 
-                       }
-                       if (y_inner != mb_row) {
-                               mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-                               score_dep |= MB_SCOREBOARD_B;
-                               if (x_inner != 0)
-                                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-                               if (x_inner != (mb_width -1)) {
-                                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                                       score_dep |= MB_SCOREBOARD_C;
-                               }
-                       }
-                                                       
-                       *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-                       *command_ptr++ = kernel;
-                       *command_ptr++ = USE_SCOREBOARD;
-                       /* Indirect data */
-                       *command_ptr++ = 0;
-                       /* the (X, Y) term of scoreboard */
-                       *command_ptr++ = ((y_inner << 16) | x_inner);
-                       *command_ptr++ = score_dep;
-
-                       /*inline data */
-                       *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
-                       *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
-                       x_inner -= 2;
-                       y_inner += 1;
-               }
-               x_outer += 1;
-       }
-
-       x_outer = mb_width - 2;
-       y_outer = first_mb / mb_width;
-       temp = 0;
-       for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
-               y_inner = y_outer;
-               x_inner = x_outer;
-               for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
-                       mb_intra_ub = 0;
-                       score_dep = 0;
-                       if (x_inner != 0) {
-                               mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-                               score_dep |= MB_SCOREBOARD_A; 
-                       }
-                       if (y_inner != mb_row) {
-                               mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-                               score_dep |= MB_SCOREBOARD_B;
-                               if (x_inner != 0)
-                                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-                               if (x_inner != (mb_width -1)) {
-                                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                                       score_dep |= MB_SCOREBOARD_C;
-                               }
-                       }
-
-                       *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-                       *command_ptr++ = kernel;
-                       *command_ptr++ = USE_SCOREBOARD;
-                       /* Indirect data */
-                       *command_ptr++ = 0;
-                       /* the (X, Y) term of scoreboard */
-                       *command_ptr++ = ((y_inner << 16) | x_inner);
-                       *command_ptr++ = score_dep;
-
-                       /*inline data */
-                       *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
-                       *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
-
-                       x_inner -= 2;
-                       y_inner += 1;
-               }
-               temp++;
-               if (temp == 2) {
-                       y_outer += 1;
-                       temp = 0;
-                       x_outer = mb_width - 2;
-               } else {
-                       x_outer++;
-               }       
-       }
-    }
-
-    *command_ptr++ = 0;
-    *command_ptr++ = MI_BATCH_BUFFER_END;
-
-    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
-}
-
 static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    dri_bo *bo;
 
     i965_gpe_context_init(ctx, &vme_context->gpe_context);
 
@@ -735,9 +592,9 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx,
     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
-    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int kernel_shader;
     bool allow_hwscore = true;
     int s;
 
@@ -748,19 +605,29 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx,
                break;
        }
     }
-
+    if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
+       (pSliceParameter->slice_type == SLICE_TYPE_I)) {
+       kernel_shader = VME_INTRA_SHADER;
+   } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
+       (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
+       kernel_shader = VME_INTER_SHADER;
+   } else {
+       kernel_shader = VME_BINTER_SHADER;
+       if (!allow_hwscore)
+            kernel_shader = VME_INTER_SHADER;
+   }
     if (allow_hwscore)
-       gen75_vme_walker_fill_vme_batchbuffer(ctx, 
+       gen7_vme_walker_fill_vme_batchbuffer(ctx, 
                                   encode_state,
                                   width_in_mbs, height_in_mbs,
-                                  is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+                                  kernel_shader,
                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
                                   encoder_context);
     else
        gen75_vme_fill_vme_batchbuffer(ctx, 
                                    encode_state,
                                    width_in_mbs, height_in_mbs,
-                                   is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+                                   kernel_shader,
                                    pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
                                    encoder_context);
 
@@ -955,11 +822,6 @@ gen75_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
     int i, s, j;
     unsigned int *command_ptr;
 
-#define                INTRA_PRED_AVAIL_FLAG_AE        0x60
-#define                INTRA_PRED_AVAIL_FLAG_B         0x10
-#define                INTRA_PRED_AVAIL_FLAG_C         0x8
-#define                INTRA_PRED_AVAIL_FLAG_D         0x4
-#define                INTRA_PRED_AVAIL_FLAG_BCD_MASK  0x1C
 
     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
@@ -1127,6 +989,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
 {
     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
     struct i965_kernel *vme_kernel_list = NULL;
+       int i965_kernel_num;
 
     switch (encoder_context->profile) {
     case VAProfileH264Baseline:
@@ -1134,13 +997,14 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
     case VAProfileH264High:
         vme_kernel_list = gen75_vme_kernels;
         encoder_context->vme_pipeline = gen75_vme_pipeline;
-        
+               i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); 
         break;
 
     case VAProfileMPEG2Simple:
     case VAProfileMPEG2Main:
         vme_kernel_list = gen75_vme_mpeg2_kernels;
         encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
+               i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
 
         break;
 
@@ -1150,7 +1014,7 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
 
         break;
     }
-
+    vme_context->vme_kernel_sum = i965_kernel_num;
     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 
     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
@@ -1164,29 +1028,12 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 
-       vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
-       vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
-       vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
-                                                               MB_SCOREBOARD_B |
-                                                               MB_SCOREBOARD_C);
-
-       /* In VME prediction the current mb depends on the neighbour 
-        * A/B/C macroblock. So the left/up/up-right dependency should
-        * be considered.
-        */
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
-       vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
-       
-       vme_context->gpe_context.vfe_desc7.dword = 0;
+    gen7_vme_scoreboard_init(ctx, vme_context);
 
     i965_gpe_load_kernels(ctx,
                           &vme_context->gpe_context,
                           vme_kernel_list,
-                          GEN6_VME_KERNEL_NUMBER);
+                          i965_kernel_num);
     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;