Add CSC conversion from NV12 to RGBX for VPP on Ivy
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
index 9ef540c..a82c796 100755 (executable)
@@ -635,6 +635,7 @@ static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
 };
 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
 };
 
 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
@@ -833,6 +834,239 @@ static struct pp_module pp_modules_gen7[] = {
             NULL,
         },
     
+        gen7_pp_plx_avs_initialize,
+    },
+            
+};
+
+static const uint32_t pp_null_gen75[][4] = {
+};
+
+static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
+};
+
+static const uint32_t pp_nv12_scaling_gen75[][4] = {
+#include "shaders/post_processing/gen7/avs.g75b"
+};
+
+static const uint32_t pp_nv12_avs_gen75[][4] = {
+#include "shaders/post_processing/gen7/avs.g75b"
+};
+
+static const uint32_t pp_nv12_dndi_gen75[][4] = {
+// #include "shaders/post_processing/gen7/dndi.g75b"
+};
+
+static const uint32_t pp_nv12_dn_gen75[][4] = {
+// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
+};
+static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pa.g75b"
+};
+static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pa.g75b"
+};
+static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pl2.g75b"
+};
+static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pl3.g75b"
+};
+static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
+};
+static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
+};
+
+static struct pp_module pp_modules_gen75[] = {
+    {
+        {
+            "NULL module (for testing)",
+            PP_NULL,
+            pp_null_gen75,
+            sizeof(pp_null_gen75),
+            NULL,
+        },
+
+        pp_null_initialize,
+    },
+
+    {
+        {
+            "NV12_NV12",
+            PP_NV12_LOAD_SAVE_N12,
+            pp_nv12_load_save_nv12_gen75,
+            sizeof(pp_nv12_load_save_nv12_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12_PL3",
+            PP_NV12_LOAD_SAVE_PL3,
+            pp_nv12_load_save_pl3_gen75,
+            sizeof(pp_nv12_load_save_pl3_gen75),
+            NULL,
+        },
+        
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_NV12",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_nv12_gen75,
+            sizeof(pp_pl3_load_save_nv12_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_PL3",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_pl3_gen75,
+            sizeof(pp_pl3_load_save_pl3_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 Scaling module",
+            PP_NV12_SCALING,
+            pp_nv12_scaling_gen75,
+            sizeof(pp_nv12_scaling_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 AVS module",
+            PP_NV12_AVS,
+            pp_nv12_avs_gen75,
+            sizeof(pp_nv12_avs_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 DNDI module",
+            PP_NV12_DNDI,
+            pp_nv12_dndi_gen75,
+            sizeof(pp_nv12_dndi_gen75),
+            NULL,
+        },
+
+        gen7_pp_nv12_dndi_initialize,
+    },
+
+    {
+        {
+            "NV12 DN module",
+            PP_NV12_DN,
+            pp_nv12_dn_gen75,
+            sizeof(pp_nv12_dn_gen75),
+            NULL,
+        },
+
+        gen7_pp_nv12_dn_initialize,
+    },
+    {
+        {
+            "NV12_PA module",
+            PP_NV12_LOAD_SAVE_PA,
+            pp_nv12_load_save_pa_gen75,
+            sizeof(pp_nv12_load_save_pa_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_PA module",
+            PP_PL3_LOAD_SAVE_PA,
+            pp_pl3_load_save_pa_gen75,
+            sizeof(pp_pl3_load_save_pa_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PA_NV12 module",
+            PP_PA_LOAD_SAVE_NV12,
+            pp_pa_load_save_nv12_gen75,
+            sizeof(pp_pa_load_save_nv12_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PA_PL3 module",
+            PP_PA_LOAD_SAVE_PL3,
+            pp_pa_load_save_pl3_gen75,
+            sizeof(pp_pa_load_save_pl3_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+    
+    {
+        {
+            "RGBX_NV12 module",
+            PP_RGBX_LOAD_SAVE_NV12,
+            pp_rgbx_load_save_nv12_gen75,
+            sizeof(pp_rgbx_load_save_nv12_gen75),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "NV12_RGBX module",
+            PP_NV12_LOAD_SAVE_RGBX,
+            pp_nv12_load_save_rgbx_gen75,
+            sizeof(pp_nv12_load_save_rgbx_gen75),
+            NULL,
+        },
+    
         pp_plx_load_save_plx_initialize,
     },
             
@@ -1298,6 +1532,7 @@ gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont
                           int width, int height, int pitch, int format, 
                           int index, int is_target)
 {
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);  
     struct gen7_surface_state *ss;
     dri_bo *ss_bo;
     unsigned int tiling;
@@ -1318,6 +1553,8 @@ gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont
     ss->ss2.height = height - 1;
     ss->ss3.pitch = pitch - 1;
     gen7_pp_set_surface_tiling(ss, tiling);
+    if (IS_HASWELL(i965->intel.device_id))
+        gen7_render_set_surface_scs(ss);
     dri_bo_emit_reloc(ss_bo,
                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
                       surf_bo_offset,
@@ -1498,6 +1735,10 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
+    int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
+                              fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
+                              fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
+                              fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
 
     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
         obj_surface = SURFACE(surface->id);
@@ -1514,7 +1755,10 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
 
             pitch[0] = obj_surface->width * 2;
-        }
+        } else if (rgbx_format) {
+           if (is_target)
+                width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
+       }
 
         width[1] = obj_surface->cb_cr_width;
         height[1] = obj_surface->cb_cr_height;
@@ -1533,7 +1777,10 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
         pitch[0] = obj_image->image.pitches[0];
         offset[0] = obj_image->image.offsets[0];
 
-        if (packed_yuv) {
+       if (rgbx_format) {
+           if (is_target)
+               width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
+       } else if (packed_yuv) {
             if (is_target)
                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
             else
@@ -1559,10 +1806,19 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
         gen7_pp_set_surface_state(ctx, pp_context,
                                   bo, 0,
                                   width[0] / 4, height[0], pitch[0],
-                                  I965_SURFACEFORMAT_R8_SINT,
+                                  I965_SURFACEFORMAT_R8_UINT,
                                   base_index, 1);
-
-        if (!packed_yuv) {
+       if (rgbx_format) {
+               struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+               /* the format is MSB: X-B-G-R */
+               pp_static_parameter->grf2.save_avs_rgb_swap = 0;
+               if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
+                              (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
+                       /* It is stored as MSB: X-R-G-B */
+                       pp_static_parameter->grf2.save_avs_rgb_swap = 1;
+               }
+       }
+        if (!packed_yuv && !rgbx_format) {
             if (interleaved_uv) {
                 gen7_pp_set_surface_state(ctx, pp_context,
                                           bo, offset[1],
@@ -1886,16 +2142,18 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con
     pp_context->pp_y_steps = pp_scaling_y_steps;
     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
 
-    pp_scaling_context->dest_x = dst_rect->x;
+    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+    float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+    pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
     pp_scaling_context->dest_y = dst_rect->y;
-    pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
-    pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
-    pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
+    pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
+    pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
+    pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
 
     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
 
-    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
+    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
 
@@ -3909,7 +4167,8 @@ gen6_pp_object_walker(VADriverContextP ctx,
         for (x = 0; x < x_steps; x++) {
             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
                 // some common block parameter update goes here, apply to all pp functions
-                update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
+                if (IS_GEN6(i965->intel.device_id))
+                    update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
                 
                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
                 *command_ptr++ = 0;
@@ -4126,6 +4385,50 @@ i965_vpp_clear_surface(VADriverContextP ctx,
     intel_batchbuffer_end_atomic(batch);
 }
 
+VAStatus
+i965_scaling_processing(
+    VADriverContextP   ctx,
+    VASurfaceID        src_surface_id,
+    const VARectangle *src_rect,
+    VASurfaceID        dst_surface_id,
+    const VARectangle *dst_rect,
+    unsigned int       flags)
+{
+    VAStatus va_status = VA_STATUS_SUCCESS;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *src_surface_obj = SURFACE(src_surface_id);
+    struct object_surface *dst_surface_obj = SURFACE(dst_surface_id);
+    assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+    assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+
+    if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
+        struct i965_surface src_surface;
+        struct i965_surface dst_surface;
+
+         _i965LockMutex(&i965->pp_mutex);
+
+         src_surface.id = src_surface_id;
+         src_surface.type = I965_SURFACE_TYPE_SURFACE;
+         src_surface.flags = I965_SURFACE_FLAG_FRAME;
+         dst_surface.id = dst_surface_id;
+         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
+         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+         va_status = i965_post_processing_internal(ctx, i965->pp_context,
+                                                   &src_surface,
+                                                   src_rect,
+                                                   &dst_surface,
+                                                   dst_rect,
+                                                   PP_NV12_AVS,
+                                                   NULL);
+
+         _i965UnlockMutex(&i965->pp_mutex);
+    }
+
+    return va_status;
+}
+
 VASurfaceID
 i965_post_processing(
     VADriverContextP   ctx,
@@ -4556,8 +4859,11 @@ i965_post_processing_context_init(VADriverContextP ctx,
     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
+    assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
 
-    if (IS_GEN7(i965->intel.device_id))
+    if (IS_HASWELL(i965->intel.device_id))
+        memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
+    else if (IS_GEN7(i965->intel.device_id))
         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
     else if (IS_GEN6(i965->intel.device_id))
         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
@@ -4625,7 +4931,7 @@ static const int proc_frame_to_pp_frame[3] = {
     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
 };
 
-static void 
+void 
 i965_proc_picture(VADriverContextP ctx, 
                   VAProfile profile, 
                   union codec_state *codec_state,
@@ -4842,7 +5148,7 @@ i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
 
     proc_context->base.destroy = i965_proc_context_destroy;
     proc_context->base.run = i965_proc_picture;
-    proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+    proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
 
     return (struct hw_context *)proc_context;