nvdec: Support CUDA buffer pool

author Seungha Yang <seungha.yang@navercorp.com>

Fri, 30 Aug 2019 04:55:25 +0000 (13:55 +0900)

committer GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>

Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
author Seungha Yang <seungha.yang@navercorp.com>
Fri, 30 Aug 2019 04:55:25 +0000 (13:55 +0900)
committer GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>
Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
diff --git a/sys/nvcodec/gstnvdec.c b/sys/nvcodec/gstnvdec.c

index 2d9dc86..b67bdbe 100644 (file)
--- a/sys/nvcodec/gstnvdec.c
+++ b/sys/nvcodec/gstnvdec.c
@@ -31,6 +31,7 @@
  
  #include "gstnvdec.h"
  #include "gstcudautils.h"
+#include "gstcudabufferpool.h"
  
  #include <string.h>
  
@@ -46,7 +47,7 @@ gst_nvdec_copy_device_to_gl (GstNvDec * nvdec,
  #endif
  
  static gboolean
-gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
+gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
      CUVIDPARSERDISPINFO * dispinfo, GstBuffer * output_buffer);
  
  #ifdef HAVE_NVCODEC_GST_GL
@@ -506,7 +507,6 @@ gst_nvdec_negotiate (GstVideoDecoder * decoder)
    state->caps = gst_video_info_to_caps (&state->info);
    nvdec->mem_type = GST_NVDEC_MEM_TYPE_SYSTEM;
  
-#ifdef HAVE_NVCODEC_GST_GL
    {
      GstCaps *caps;
      caps = gst_pad_get_allowed_caps (GST_VIDEO_DECODER_SRC_PAD (nvdec));
@@ -515,20 +515,35 @@ gst_nvdec_negotiate (GstVideoDecoder * decoder)
      if (!caps || gst_caps_is_any (caps)) {
        GST_DEBUG_OBJECT (nvdec,
            "cannot determine output format, use system memory");
-    } else if (nvdec->gl_display) {
+    } else {
        GstCapsFeatures *features;
        guint size = gst_caps_get_size (caps);
        guint i;
+      gboolean have_cuda = FALSE;
+      gboolean have_gl = FALSE;
  
        for (i = 0; i < size; i++) {
          features = gst_caps_get_features (caps, i);
          if (features && gst_caps_features_contains (features,
-                GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
-          GST_DEBUG_OBJECT (nvdec, "found GL memory feature, use gl");
-          nvdec->mem_type = GST_NVDEC_MEM_TYPE_GL;
+                GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
+          GST_DEBUG_OBJECT (nvdec, "found CUDA memory feature");
+          have_cuda = TRUE;
            break;
          }
+#ifdef HAVE_NVCODEC_GST_GL
+        if (nvdec->gl_display &&
+            features && gst_caps_features_contains (features,
+                GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
+          GST_DEBUG_OBJECT (nvdec, "found GL memory feature");
+          have_gl = TRUE;
+        }
+#endif
        }
+
+      if (have_cuda)
+        nvdec->mem_type = GST_NVDEC_MEM_TYPE_CUDA;
+      else if (have_gl)
+        nvdec->mem_type = GST_NVDEC_MEM_TYPE_GL;
      }
      gst_clear_caps (&caps);
    }
@@ -540,15 +555,25 @@ gst_nvdec_negotiate (GstVideoDecoder * decoder)
      nvdec->mem_type = GST_NVDEC_MEM_TYPE_SYSTEM;
    }
  
-  if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_GL) {
-    gst_caps_set_features (state->caps, 0,
-        gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, NULL));
-    gst_caps_set_simple (state->caps, "texture-target", G_TYPE_STRING,
-        "2D", NULL);
-  } else {
-    GST_DEBUG_OBJECT (nvdec, "use system memory");
-  }
+  switch (nvdec->mem_type) {
+    case GST_NVDEC_MEM_TYPE_CUDA:
+      GST_DEBUG_OBJECT (nvdec, "use cuda memory");
+      gst_caps_set_features (state->caps, 0,
+          gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, NULL));
+      break;
+#ifdef HAVE_NVCODEC_GST_GL
+    case GST_NVDEC_MEM_TYPE_GL:
+      GST_DEBUG_OBJECT (nvdec, "use gl memory");
+      gst_caps_set_features (state->caps, 0,
+          gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, NULL));
+      gst_caps_set_simple (state->caps, "texture-target", G_TYPE_STRING,
+          "2D", NULL);
+      break;
  #endif
+    default:
+      GST_DEBUG_OBJECT (nvdec, "use system memory");
+      break;
+  }
  
    if (nvdec->output_state)
      gst_video_codec_state_unref (nvdec->output_state);
@@ -711,7 +736,7 @@ parser_display_callback (GstNvDec * nvdec, CUVIDPARSERDISPINFO * dispinfo)
    if (!copy_ret)
  #endif
    {
-    copy_ret = gst_nvdec_copy_device_to_system (nvdec, dispinfo, output_buffer);
+    copy_ret = gst_nvdec_copy_device_to_memory (nvdec, dispinfo, output_buffer);
    }
  
    if (!copy_ret) {
@@ -1103,7 +1128,7 @@ gst_nvdec_copy_device_to_gl (GstNvDec * nvdec,
  #endif
  
  static gboolean
-gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
+gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
      CUVIDPARSERDISPINFO * dispinfo, GstBuffer * output_buffer)
  {
    CUVIDPROCPARAMS params = { 0, };
@@ -1113,16 +1138,33 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
    GstVideoFrame video_frame;
    GstVideoInfo *info = &nvdec->output_state->info;
    gint i;
+  GstMemory *mem;
+  GstCudaMemory *cuda_mem = NULL;
  
    if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
      GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
      return FALSE;
    }
  
-  if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) {
-    GST_ERROR_OBJECT (nvdec, "frame map failure");
-    gst_cuda_context_pop (NULL);
-    return FALSE;
+  if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA &&
+      (mem = gst_buffer_peek_memory (output_buffer, 0)) &&
+      gst_is_cuda_memory (mem)) {
+    GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
+
+    /* FIXME: enhance CUDA memory copy over multiple-gpu */
+    if (cmem->context == nvdec->cuda_ctx ||
+        gst_cuda_context_get_handle (cmem->context) ==
+        gst_cuda_context_get_handle (nvdec->cuda_ctx)) {
+      cuda_mem = cmem;
+    }
+  }
+
+  if (!cuda_mem) {
+    if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) {
+      GST_ERROR_OBJECT (nvdec, "frame map failure");
+      gst_cuda_context_pop (NULL);
+      return FALSE;
+    }
    }
  
    params.progressive_frame = dispinfo->progressive_frame;
@@ -1139,20 +1181,27 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
  
    copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
    copy_params.srcPitch = pitch;
-  copy_params.dstMemoryType = CU_MEMORYTYPE_HOST;
-  copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, 0)
-      * GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
+  copy_params.dstMemoryType =
+      cuda_mem ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
  
-  for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&video_frame); i++) {
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
      copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info));
-    copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
-    copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
-    copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i);
+    if (cuda_mem) {
+      copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i];
+      copy_params.dstPitch = cuda_mem->stride;
+    } else {
+      copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
+      copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
+    }
+    copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i)
+        * GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
+    copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
  
      if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, nvdec->cuda_stream))) {
        GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
        CuvidUnmapVideoFrame (nvdec->decoder, dptr);
-      gst_video_frame_unmap (&video_frame);
+      if (!cuda_mem)
+        gst_video_frame_unmap (&video_frame);
        gst_cuda_context_pop (NULL);
        return FALSE;
      }
@@ -1160,7 +1209,8 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
  
    gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
  
-  gst_video_frame_unmap (&video_frame);
+  if (!cuda_mem)
+    gst_video_frame_unmap (&video_frame);
  
    if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
      GST_WARNING_OBJECT (nvdec, "failed to unmap video frame");
@@ -1346,13 +1396,9 @@ gst_nvdec_ensure_gl_context (GstNvDec * nvdec)
    return TRUE;
  }
  
-#endif
-
  static gboolean
-gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
+gst_nvdec_ensure_gl_pool (GstNvDec * nvdec, GstQuery * query)
  {
-#ifdef HAVE_NVCODEC_GST_GL
-  GstNvDec *nvdec = GST_NVDEC (decoder);
    GstCaps *outcaps;
    GstBufferPool *pool = NULL;
    guint n, size, min, max;
@@ -1361,10 +1407,6 @@ gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
  
    GST_DEBUG_OBJECT (nvdec, "decide allocation");
  
-  if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_SYSTEM)
-    return GST_VIDEO_DECODER_CLASS (gst_nvdec_parent_class)->decide_allocation
-        (decoder, query);
-
    gst_query_parse_allocation (query, &outcaps, NULL);
    n = gst_query_get_n_allocation_pools (query);
    if (n > 0)
@@ -1376,6 +1418,7 @@ gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
    }
  
    if (!pool) {
+    GST_DEBUG_OBJECT (nvdec, "no downstream pool, create our pool");
      pool = gst_gl_buffer_pool_new (nvdec->gl_context);
  
      if (outcaps)
@@ -1393,8 +1436,74 @@ gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
    else
      gst_query_add_allocation_pool (query, pool, size, min, max);
    gst_object_unref (pool);
+
+  return TRUE;
+}
  #endif
  
+static gboolean
+gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
+{
+  GstCaps *outcaps;
+  GstBufferPool *pool = NULL;
+  guint n, size, min, max;
+  GstVideoInfo vinfo = { 0, };
+  GstStructure *config;
+
+  gst_query_parse_allocation (query, &outcaps, NULL);
+  n = gst_query_get_n_allocation_pools (query);
+  if (n > 0) {
+    gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
+    if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) {
+      gst_object_unref (pool);
+      pool = NULL;
+    }
+  }
+
+  if (!pool) {
+    GST_DEBUG_OBJECT (nvdec, "no downstream pool, create our pool");
+    pool = gst_cuda_buffer_pool_new (nvdec->cuda_ctx);
+
+    if (outcaps)
+      gst_video_info_from_caps (&vinfo, outcaps);
+    size = (guint) vinfo.size;
+    min = max = 0;
+  }
+
+  config = gst_buffer_pool_get_config (pool);
+  gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
+  gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
+  gst_buffer_pool_set_config (pool, config);
+  if (n > 0)
+    gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
+  else
+    gst_query_add_allocation_pool (query, pool, size, min, max);
+  gst_object_unref (pool);
+
+  return TRUE;
+}
+
+static gboolean
+gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
+{
+  GstNvDec *nvdec = GST_NVDEC (decoder);
+
+  GST_DEBUG_OBJECT (nvdec, "decide allocation");
+
+  if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_SYSTEM)
+    goto done;
+
+#ifdef HAVE_NVCODEC_GST_GL
+  if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_GL) {
+    if (!gst_nvdec_ensure_gl_pool (nvdec, query))
+      return FALSE;
+  } else
+#endif
+  if (!gst_nvdec_ensure_cuda_pool (nvdec, query)) {
+    return FALSE;
+  }
+
+done:
    return GST_VIDEO_DECODER_CLASS (gst_nvdec_parent_class)->decide_allocation
        (decoder, query);
  }
diff --git a/sys/nvcodec/gstnvdec.h b/sys/nvcodec/gstnvdec.h

index 1b4d083..d9ec76e 100644 (file)
--- a/sys/nvcodec/gstnvdec.h
+++ b/sys/nvcodec/gstnvdec.h
@@ -61,7 +61,8 @@ typedef enum
  {
    GST_NVDEC_MEM_TYPE_SYSTEM = 0,
    GST_NVDEC_MEM_TYPE_GL,
-  /* FIXME: add support CUDA, D3D11 memory */
+  GST_NVDEC_MEM_TYPE_CUDA,
+  /* FIXME: add support D3D11 memory */
  } GstNvDecMemType;
  
  struct _GstNvDec
author	Seungha Yang <seungha.yang@navercorp.com>
	Fri, 30 Aug 2019 04:55:25 +0000 (13:55 +0900)
committer	GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>
	Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
sys/nvcodec/gstnvdec.c		patch \| blob \| history
sys/nvcodec/gstnvdec.h		patch \| blob \| history