nvenc: Support CUDA buffer pool

author Seungha Yang <seungha.yang@navercorp.com>

Fri, 30 Aug 2019 04:57:15 +0000 (13:57 +0900)

committer GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>

Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
author Seungha Yang <seungha.yang@navercorp.com>
Fri, 30 Aug 2019 04:57:15 +0000 (13:57 +0900)
committer GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>
Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
diff --git a/sys/nvcodec/gstnvbaseenc.c b/sys/nvcodec/gstnvbaseenc.c

index 0b803525d3e56d2995d8a5b2cbebf4025b55d387..2b780f1b4c408fc0a05446319fd7958727775099 100644 (file)
--- a/sys/nvcodec/gstnvbaseenc.c
+++ b/sys/nvcodec/gstnvbaseenc.c
@@ -23,6 +23,7 @@
  
  #include "gstnvbaseenc.h"
  #include "gstcudautils.h"
+#include "gstcudabufferpool.h"
  
  #include <gst/pbutils/codec-utils.h>
  
@@ -249,6 +250,8 @@ static GstCaps *gst_nv_base_enc_getcaps (GstVideoEncoder * enc,
  static gboolean gst_nv_base_enc_stop_bitstream_thread (GstNvBaseEnc * nvenc,
      gboolean force);
  static gboolean gst_nv_base_enc_drain_encoder (GstNvBaseEnc * nvenc);
+static gboolean gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc,
+    GstQuery * query);
  
  static void
  gst_nv_base_enc_class_init (GstNvBaseEncClass * klass)
@@ -276,6 +279,8 @@ gst_nv_base_enc_class_init (GstNvBaseEncClass * klass)
    videoenc_class->finish = GST_DEBUG_FUNCPTR (gst_nv_base_enc_finish);
    videoenc_class->sink_query = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_query);
    videoenc_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_event);
+  videoenc_class->propose_allocation =
+      GST_DEBUG_FUNCPTR (gst_nv_base_enc_propose_allocation);
  
    g_object_class_install_property (gobject_class, PROP_DEVICE_ID,
        g_param_spec_uint ("cuda-device-id",
@@ -564,6 +569,129 @@ gst_nv_base_enc_sink_query (GstVideoEncoder * enc, GstQuery * query)
    return GST_VIDEO_ENCODER_CLASS (parent_class)->sink_query (enc, query);
  }
  
+#ifdef HAVE_NVCODEC_GST_GL
+static gboolean
+gst_nv_base_enc_ensure_gl_context (GstNvBaseEnc * nvenc)
+{
+  if (!nvenc->display) {
+    GST_DEBUG_OBJECT (nvenc, "No available OpenGL display");
+    return FALSE;
+  }
+
+  if (!gst_gl_query_local_gl_context (GST_ELEMENT (nvenc), GST_PAD_SINK,
+          (GstGLContext **) & nvenc->gl_context)) {
+    GST_INFO_OBJECT (nvenc, "failed to query local OpenGL context");
+    if (nvenc->gl_context)
+      gst_object_unref (nvenc->gl_context);
+    nvenc->gl_context =
+        (GstObject *) gst_gl_display_get_gl_context_for_thread ((GstGLDisplay *)
+        nvenc->display, NULL);
+    if (!nvenc->gl_context
+        || !gst_gl_display_add_context ((GstGLDisplay *) nvenc->display,
+            (GstGLContext *) nvenc->gl_context)) {
+      if (nvenc->gl_context)
+        gst_object_unref (nvenc->gl_context);
+      if (!gst_gl_display_create_context ((GstGLDisplay *) nvenc->display,
+              (GstGLContext *) nvenc->other_context,
+              (GstGLContext **) & nvenc->gl_context, NULL)) {
+        GST_ERROR_OBJECT (nvenc, "failed to create OpenGL context");
+        return FALSE;
+      }
+      if (!gst_gl_display_add_context ((GstGLDisplay *) nvenc->display,
+              (GstGLContext *) nvenc->gl_context)) {
+        GST_ERROR_OBJECT (nvenc,
+            "failed to add the OpenGL context to the display");
+        return FALSE;
+      }
+    }
+  }
+
+  if (!gst_gl_context_check_gl_version ((GstGLContext *) nvenc->gl_context,
+          SUPPORTED_GL_APIS, 3, 0)) {
+    GST_WARNING_OBJECT (nvenc, "OpenGL context could not support PBO download");
+    return FALSE;
+  }
+
+  return TRUE;
+}
+#endif
+
+static gboolean
+gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+  GstCaps *caps;
+  GstVideoInfo info;
+  GstBufferPool *pool;
+  GstStructure *config;
+  GstCapsFeatures *features;
+
+  GST_DEBUG_OBJECT (nvenc, "propose allocation");
+
+  gst_query_parse_allocation (query, &caps, NULL);
+
+  if (caps == NULL)
+    return FALSE;
+
+  if (!gst_video_info_from_caps (&info, caps)) {
+    GST_WARNING_OBJECT (nvenc, "failed to get video info");
+    return FALSE;
+  }
+
+  features = gst_caps_get_features (caps, 0);
+#if HAVE_NVCODEC_GST_GL
+  if (features && gst_caps_features_contains (features,
+          GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
+    GST_DEBUG_OBJECT (nvenc, "upsteram support GL memory");
+    if (!gst_nv_base_enc_ensure_gl_context (nvenc)) {
+      GST_WARNING_OBJECT (nvenc, "Could not get gl context");
+      goto done;
+    }
+
+    pool = gst_gl_buffer_pool_new ((GstGLContext *) nvenc->gl_context);
+  } else
+#endif
+  if (features && gst_caps_features_contains (features,
+          GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
+    GST_DEBUG_OBJECT (nvenc, "upstream support CUDA memory");
+    pool = gst_cuda_buffer_pool_new (nvenc->cuda_ctx);
+  } else {
+    GST_DEBUG_OBJECT (nvenc, "use system memory");
+    goto done;
+  }
+
+  if (G_UNLIKELY (pool == NULL)) {
+    GST_WARNING_OBJECT (nvenc, "cannot create buffer pool");
+    goto done;
+  }
+
+  config = gst_buffer_pool_get_config (pool);
+  gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (&info),
+      nvenc->items->len, nvenc->items->len);
+
+  gst_query_add_allocation_pool (query, pool, GST_VIDEO_INFO_SIZE (&info),
+      nvenc->items->len, nvenc->items->len);
+  gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
+  gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
+
+  if (!gst_buffer_pool_set_config (pool, config))
+    goto error_pool_config;
+
+  gst_object_unref (pool);
+
+done:
+  return GST_VIDEO_ENCODER_CLASS (parent_class)->propose_allocation (enc,
+      query);
+
+error_pool_config:
+  {
+    if (pool)
+      gst_object_unref (pool);
+    GST_WARNING_OBJECT (nvenc, "failed to set config");
+    return FALSE;
+  }
+}
+
  static gboolean
  gst_nv_base_enc_sink_event (GstVideoEncoder * enc, GstEvent * event)
  {
@@ -649,6 +777,10 @@ gst_nv_base_enc_stop (GstVideoEncoder * enc)
      gst_object_unref (nvenc->other_context);
      nvenc->other_context = NULL;
    }
+  if (nvenc->gl_context) {
+    gst_object_unref (nvenc->gl_context);
+    nvenc->gl_context = NULL;
+  }
  
    if (nvenc->items) {
      g_array_free (nvenc->items, TRUE);
@@ -1717,7 +1849,6 @@ gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
  
    if (!reconfigure) {
      nvenc->input_info = *info;
-    nvenc->gl_input = FALSE;
    }
  
    if (nvenc->input_state)
@@ -1727,9 +1858,7 @@ gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
  
    /* now allocate some buffers only on first configuration */
    if (!reconfigure) {
-#if HAVE_NVCODEC_GST_GL
      GstCapsFeatures *features;
-#endif
      guint i;
      guint input_width, input_height;
      guint n_bufs;
@@ -1744,11 +1873,17 @@ gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
      /* input buffers */
      g_array_set_size (nvenc->items, n_bufs);
  
-#if HAVE_NVCODEC_GST_GL
+    nvenc->mem_type = GST_NVENC_MEM_TYPE_SYSTEM;
+
      features = gst_caps_get_features (state->caps, 0);
      if (gst_caps_features_contains (features,
+            GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
+      nvenc->mem_type = GST_NVENC_MEM_TYPE_CUDA;
+    }
+#if HAVE_NVCODEC_GST_GL
+    else if (gst_caps_features_contains (features,
              GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
-      nvenc->gl_input = TRUE;
+      nvenc->mem_type = GST_NVENC_MEM_TYPE_GL;
      }
  #endif
  
@@ -2090,26 +2225,37 @@ _map_gl_input_buffer (GstGLContext * context, GstNvEncGLMapData * data)
  
  static gboolean
  gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
-    GstNvEncInputResource * resource)
+    GstNvEncInputResource * resource, gboolean use_device_memory)
  {
    gint i;
    CUdeviceptr dst = resource->cuda_pointer;
    GstVideoInfo *info = &frame->info;
    CUresult cuda_ret;
+  GstCudaMemory *cuda_mem = NULL;
  
    if (!gst_cuda_context_push (nvenc->cuda_ctx)) {
      GST_ERROR_OBJECT (nvenc, "cannot push context");
      return FALSE;
    }
  
+  if (use_device_memory) {
+    cuda_mem = (GstCudaMemory *) gst_buffer_peek_memory (frame->buffer, 0);
+  }
+
    for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
      CUDA_MEMCPY2D param = { 0, };
      guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i,
          resource->cuda_stride);
  
-    param.srcMemoryType = CU_MEMORYTYPE_HOST;
-    param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
-    param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
+    if (use_device_memory) {
+      param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+      param.srcDevice = cuda_mem->data + cuda_mem->offset[i];
+      param.srcPitch = cuda_mem->stride;
+    } else {
+      param.srcMemoryType = CU_MEMORYTYPE_HOST;
+      param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
+      param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
+    }
  
      param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
      param.dstDevice = dst;
@@ -2268,6 +2414,7 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
    GstMapFlags in_map_flags = GST_MAP_READ;
    GstNvEncFrameState *state = NULL;
    GstNvEncInputResource *resource = NULL;
+  gboolean use_device_memory = FALSE;
  
    g_assert (nvenc->encoder != NULL);
  
@@ -2292,10 +2439,27 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
      GST_VIDEO_CODEC_FRAME_SET_FORCE_KEYFRAME (frame);
    }
  #if HAVE_NVCODEC_GST_GL
-  if (nvenc->gl_input)
+  if (nvenc->mem_type == GST_NVENC_MEM_TYPE_GL)
      in_map_flags |= GST_MAP_GL;
  #endif
  
+  if (nvenc->mem_type == GST_NVENC_MEM_TYPE_CUDA) {
+    GstMemory *mem;
+
+    if ((mem = gst_buffer_peek_memory (frame->input_buffer, 0)) &&
+        gst_is_cuda_memory (mem)) {
+      GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
+
+      /* FIXME: enhance CUDA memory copy over multiple-gpu */
+      if (cmem->context == nvenc->cuda_ctx ||
+          gst_cuda_context_get_handle (cmem->context) ==
+          gst_cuda_context_get_handle (nvenc->cuda_ctx)) {
+        use_device_memory = TRUE;
+        in_map_flags |= GST_MAP_CUDA;
+      }
+    }
+  }
+
    if (!gst_video_frame_map (&vframe, info, frame->input_buffer, in_map_flags)) {
      goto drop;
    }
@@ -2315,7 +2479,7 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
    resource = state->in_buf;
  
  #if HAVE_NVCODEC_GST_GL
-  if (nvenc->gl_input) {
+  if (nvenc->mem_type == GST_NVENC_MEM_TYPE_GL) {
      GstGLMemory *gl_mem;
      GstNvEncGLMapData data;
  
@@ -2335,7 +2499,8 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
      }
    } else
  #endif
-  if (!gst_nv_base_enc_upload_frame (nvenc, &vframe, resource)) {
+  if (!gst_nv_base_enc_upload_frame (nvenc,
+          &vframe, resource, use_device_memory)) {
      flow = GST_FLOW_ERROR;
      goto unmap_and_drop;
    }
diff --git a/sys/nvcodec/gstnvbaseenc.h b/sys/nvcodec/gstnvbaseenc.h

index e9b2fe15e05f512b41ffc6470e3ef13e7f6c7ebd..26e240b7b7cc9fcd17a6e186bb6c2e718e192ec7 100644 (file)
--- a/sys/nvcodec/gstnvbaseenc.h
+++ b/sys/nvcodec/gstnvbaseenc.h
@@ -61,6 +61,14 @@ typedef enum {
    GST_NV_RC_MODE_VBR_HQ,
  } GstNvRCMode;
  
+typedef enum
+{
+  GST_NVENC_MEM_TYPE_SYSTEM = 0,
+  GST_NVENC_MEM_TYPE_GL,
+  GST_NVENC_MEM_TYPE_CUDA,
+  /* FIXME: add support D3D11 memory */
+} GstNvEncMemType;
+
  typedef struct {
    gboolean weighted_prediction;
    gint rc_modes;
@@ -112,7 +120,7 @@ typedef struct {
  
    GstVideoCodecState *input_state;
    volatile gint       reconfig;                   /* ATOMIC */
-  gboolean            gl_input;
+  GstNvEncMemType     mem_type;
  
    /* array of allocated input/output buffers (GstNvEncFrameState),
     * and hold the ownership of the GstNvEncFrameState. */
@@ -137,6 +145,7 @@ typedef struct {
  
    GstObject      *display;            /* GstGLDisplay */
    GstObject      *other_context;      /* GstGLContext */
+  GstObject      *gl_context;         /* GstGLContext */
  
    GstVideoInfo        input_info;     /* buffer configuration for buffers sent to NVENC */
  
diff --git a/sys/nvcodec/gstnvenc.c b/sys/nvcodec/gstnvenc.c

index 0e33a8429827e29b1d729914068e6b05a004867e..252e9ab6fb1bce1bc142c40086e4db811b8f3815 100644 (file)
--- a/sys/nvcodec/gstnvenc.c
+++ b/sys/nvcodec/gstnvenc.c
@@ -24,6 +24,8 @@
  #include "gstnvenc.h"
  #include "gstnvh264enc.h"
  #include "gstnvh265enc.h"
+#include "gstcudabufferpool.h"
+
  #include <gmodule.h>
  
  #if HAVE_NVCODEC_GST_GL
@@ -787,15 +789,21 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
        g_value_unset (interlace_modes);
        g_free (interlace_modes);
      }
-#if HAVE_NVCODEC_GST_GL
+
      {
+      GstCaps *cuda_caps = gst_caps_copy (sink_templ);
+#if HAVE_NVCODEC_GST_GL
        GstCaps *gl_caps = gst_caps_copy (sink_templ);
        gst_caps_set_features_simple (gl_caps,
            gst_caps_features_from_string (GST_CAPS_FEATURE_MEMORY_GL_MEMORY));
        gst_caps_append (sink_templ, gl_caps);
-    }
  #endif
  
+      gst_caps_set_features_simple (cuda_caps,
+          gst_caps_features_from_string (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY));
+      gst_caps_append (sink_templ, cuda_caps);
+    }
+
      name = g_strdup_printf ("video/x-%s", codec);
      src_templ = gst_caps_new_simple (name,
          "width", GST_TYPE_INT_RANGE, min_width, max_width,
author	Seungha Yang <seungha.yang@navercorp.com>
	Fri, 30 Aug 2019 04:57:15 +0000 (13:57 +0900)
committer	GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>
	Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
sys/nvcodec/gstnvbaseenc.c		patch \| blob \| history
sys/nvcodec/gstnvbaseenc.h		patch \| blob \| history
sys/nvcodec/gstnvenc.c		patch \| blob \| history