nvcodec: Add CUDA specific memory and bufferpool

author Seungha Yang <seungha.yang@navercorp.com>

Mon, 19 Aug 2019 09:02:56 +0000 (18:02 +0900)

committer GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>

Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
author Seungha Yang <seungha.yang@navercorp.com>
Mon, 19 Aug 2019 09:02:56 +0000 (18:02 +0900)
committer GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>
Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
diff --git a/sys/nvcodec/gstcudabufferpool.c b/sys/nvcodec/gstcudabufferpool.c

new file mode 100644 (file)

index 0000000..a546268
--- /dev/null
+++ b/sys/nvcodec/gstcudabufferpool.c
@@ -0,0 +1,259 @@
+/* GStreamer
+ * Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstcudabufferpool.h"
+#include "gstcudacontext.h"
+#include "gstcudamemory.h"
+
+GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug);
+#define GST_CAT_DEFAULT gst_cuda_buffer_pool_debug
+
+struct _GstCudaBufferPoolPrivate
+{
+  GstCudaContext *context;
+  GstAllocator *allocator;
+  GstVideoInfo info;
+  gboolean add_videometa;
+  gboolean need_alignment;
+  GstCudaAllocationParams params;
+};
+
+#define gst_cuda_buffer_pool_parent_class parent_class
+G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool,
+    GST_TYPE_BUFFER_POOL);
+
+static const gchar **
+gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
+{
+  static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META,
+    GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL
+  };
+
+  return options;
+}
+
+static gboolean
+gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config)
+{
+  GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
+  GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
+  GstCaps *caps = NULL;
+  guint size, min_buffers, max_buffers;
+  guint max_align, n;
+  GstAllocator *allocator = NULL;
+  GstAllocationParams *params = (GstAllocationParams *) & priv->params;
+  GstVideoInfo *info = &priv->params.info;
+
+  if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers,
+          &max_buffers))
+    goto wrong_config;
+
+  if (caps == NULL)
+    goto no_caps;
+
+  if (!gst_buffer_pool_config_get_allocator (config, &allocator, params))
+    goto wrong_config;
+
+  /* now parse the caps from the config */
+  if (!gst_video_info_from_caps (info, caps))
+    goto wrong_caps;
+
+  GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT,
+      GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps);
+
+  gst_clear_object (&priv->allocator);
+
+  if (allocator) {
+    if (!GST_IS_CUDA_ALLOCATOR (allocator)) {
+      goto wrong_allocator;
+    } else {
+      priv->allocator = gst_object_ref (allocator);
+    }
+  } else {
+    allocator = priv->allocator = gst_cuda_allocator_new (priv->context);
+    if (G_UNLIKELY (priv->allocator == NULL))
+      goto no_allocator;
+  }
+
+  priv->add_videometa = gst_buffer_pool_config_has_option (config,
+      GST_BUFFER_POOL_OPTION_VIDEO_META);
+
+  priv->need_alignment = gst_buffer_pool_config_has_option (config,
+      GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
+
+  max_align = params->align;
+
+  /* do memory align */
+  if (priv->need_alignment && priv->add_videometa) {
+    GstVideoAlignment valign;
+
+    gst_buffer_pool_config_get_video_alignment (config, &valign);
+
+    for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
+      max_align |= valign.stride_align[n];
+
+    for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
+      valign.stride_align[n] = max_align;
+
+    if (!gst_video_info_align (info, &valign))
+      goto failed_to_align;
+
+    gst_buffer_pool_config_set_video_alignment (config, &valign);
+  }
+
+  if (params->align < max_align) {
+    GST_WARNING_OBJECT (pool, "allocation params alignment %u is smaller "
+        "than the max specified video stride alignment %u, fixing",
+        (guint) params->align, max_align);
+
+    params->align = max_align;
+    gst_buffer_pool_config_set_allocator (config, allocator, params);
+  }
+
+  gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info),
+      min_buffers, max_buffers);
+
+  return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
+
+  /* ERRORS */
+wrong_config:
+  {
+    GST_WARNING_OBJECT (pool, "invalid config");
+    return FALSE;
+  }
+no_caps:
+  {
+    GST_WARNING_OBJECT (pool, "no caps in config");
+    return FALSE;
+  }
+wrong_caps:
+  {
+    GST_WARNING_OBJECT (pool,
+        "failed getting geometry from caps %" GST_PTR_FORMAT, caps);
+    return FALSE;
+  }
+no_allocator:
+  {
+    GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator");
+    return FALSE;
+  }
+wrong_allocator:
+  {
+    GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool");
+    return FALSE;
+  }
+failed_to_align:
+  {
+    GST_WARNING_OBJECT (pool, "Failed to align");
+    return FALSE;
+  }
+}
+
+static GstFlowReturn
+gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
+    GstBufferPoolAcquireParams * params)
+{
+  GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
+  GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
+  GstVideoInfo *info;
+  GstBuffer *cuda;
+  GstMemory *mem;
+
+  info = &priv->params.info;
+
+  cuda = gst_buffer_new ();
+
+  mem = gst_cuda_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator),
+      GST_VIDEO_INFO_SIZE (info), &priv->params);
+
+  if (mem == NULL) {
+    gst_buffer_unref (cuda);
+    GST_WARNING_OBJECT (pool, "Cannot create CUDA memory");
+    return GST_FLOW_ERROR;
+  }
+  gst_buffer_append_memory (cuda, mem);
+
+  if (priv->add_videometa) {
+    GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
+    gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE,
+        GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
+        GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
+        info->offset, info->stride);
+  }
+
+  *buffer = cuda;
+
+  return GST_FLOW_OK;
+}
+
+GstBufferPool *
+gst_cuda_buffer_pool_new (GstCudaContext * context)
+{
+  GstCudaBufferPool *pool;
+
+  pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
+  gst_object_ref_sink (pool);
+
+  pool->priv->context = gst_object_ref (context);
+
+  GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool);
+
+  return GST_BUFFER_POOL_CAST (pool);
+}
+
+static void
+gst_cuda_buffer_pool_dispose (GObject * object)
+{
+  GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object);
+  GstCudaBufferPoolPrivate *priv = pool->priv;
+
+  GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool);
+
+  gst_clear_object (&priv->allocator);
+  gst_clear_object (&priv->context);
+
+  G_OBJECT_CLASS (parent_class)->dispose (object);
+}
+
+
+static void
+gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass)
+{
+  GObjectClass *gobject_class = (GObjectClass *) klass;
+  GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass;
+
+  gobject_class->dispose = gst_cuda_buffer_pool_dispose;
+
+  gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
+  gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
+  gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
+
+  GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0,
+      "CUDA Buffer Pool");
+}
+
+static void
+gst_cuda_buffer_pool_init (GstCudaBufferPool * pool)
+{
+  pool->priv = gst_cuda_buffer_pool_get_instance_private (pool);
+}
diff --git a/sys/nvcodec/gstcudabufferpool.h b/sys/nvcodec/gstcudabufferpool.h

new file mode 100644 (file)

index 0000000..8e1be07
--- /dev/null
+++ b/sys/nvcodec/gstcudabufferpool.h
@@ -0,0 +1,66 @@
+/* GStreamer
+ * Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_CUDA_BUFFER_POOL_H__
+#define __GST_CUDA_BUFFER_POOL_H__
+
+#include <gst/video/gstvideometa.h>
+#include <gst/video/gstvideopool.h>
+
+#include "gstcudamemory.h"
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_CUDA_BUFFER_POOL             (gst_cuda_buffer_pool_get_type ())
+#define GST_CUDA_BUFFER_POOL(obj)             (G_TYPE_CHECK_INSTANCE_CAST ((obj),GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPool))
+#define GST_CUDA_BUFFER_POOL_CLASS(klass)     (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPoolClass))
+#define GST_CUDA_BUFFER_POOL_GET_CLASS(obj)   (G_TYPE_INSTANCE_GET_CLASS((obj),  GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPoolClass))
+#define GST_IS_CUDA_BUFFER_POOL(obj)          (G_TYPE_CHECK_INSTANCE_TYPE ((obj),GST_TYPE_CUDA_BUFFER_POOL))
+#define GST_IS_CUDA_BUFFER_POOL_CLASS(klass)  (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_CUDA_BUFFER_POOL))
+#define GST_CUDA_BUFFER_POOL_CAST(obj)        ((GstCudaBufferPool*)(obj))
+
+typedef struct _GstCudaBufferPool GstCudaBufferPool;
+typedef struct _GstCudaBufferPoolClass GstCudaBufferPoolClass;
+typedef struct _GstCudaBufferPoolPrivate GstCudaBufferPoolPrivate;
+
+/*
+ * GstCudaBufferPool:
+ */
+struct _GstCudaBufferPool
+{
+  GstBufferPool parent;
+
+  GstCudaBufferPoolPrivate *priv;
+};
+
+/*
+ * GstCudaBufferPoolClass:
+ */
+struct _GstCudaBufferPoolClass
+{
+  GstBufferPoolClass parent_class;
+};
+
+GType gst_cuda_buffer_pool_get_type (void);
+
+GstBufferPool * gst_cuda_buffer_pool_new (GstCudaContext * context);
+
+G_END_DECLS
+
+#endif /* __GST_CUDA_BUFFER_POOL_H__ */
diff --git a/sys/nvcodec/gstcudaloader.c b/sys/nvcodec/gstcudaloader.c

index 965717a..01dc0d7 100644 (file)
--- a/sys/nvcodec/gstcudaloader.c
+++ b/sys/nvcodec/gstcudaloader.c
@@ -69,10 +69,14 @@ typedef struct _GstNvCodecCudaVTable
      CUresult (CUDAAPI * CuMemAlloc) (CUdeviceptr * dptr, unsigned int bytesize);
      CUresult (CUDAAPI * CuMemAllocPitch) (CUdeviceptr * dptr, size_t * pPitch,
        size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
+    CUresult (CUDAAPI * CuMemAllocHost) (void **pp, unsigned int bytesize);
      CUresult (CUDAAPI * CuMemcpy2D) (const CUDA_MEMCPY2D * pCopy);
      CUresult (CUDAAPI * CuMemcpy2DAsync) (const CUDA_MEMCPY2D * pCopy,
        CUstream hStream);
+
      CUresult (CUDAAPI * CuMemFree) (CUdeviceptr dptr);
+    CUresult (CUDAAPI * CuMemFreeHost) (void *p);
+
      CUresult (CUDAAPI * CuStreamCreate) (CUstream * phStream,
        unsigned int Flags);
      CUresult (CUDAAPI * CuStreamDestroy) (CUstream hStream);
@@ -136,9 +140,12 @@ gst_cuda_load_library (void)
  
    LOAD_SYMBOL (cuMemAlloc, CuMemAlloc);
    LOAD_SYMBOL (cuMemAllocPitch, CuMemAllocPitch);
+  LOAD_SYMBOL (cuMemAllocHost, CuMemAllocHost);
    LOAD_SYMBOL (cuMemcpy2D, CuMemcpy2D);
    LOAD_SYMBOL (cuMemcpy2DAsync, CuMemcpy2DAsync);
+
    LOAD_SYMBOL (cuMemFree, CuMemFree);
+  LOAD_SYMBOL (cuMemFreeHost, CuMemFreeHost);
  
    LOAD_SYMBOL (cuStreamCreate, CuStreamCreate);
    LOAD_SYMBOL (cuStreamDestroy, CuStreamDestroy);
@@ -286,6 +293,14 @@ CuMemAllocPitch (CUdeviceptr * dptr, size_t * pPitch, size_t WidthInBytes,
  }
  
  CUresult CUDAAPI
+CuMemAllocHost (void **pp, unsigned int bytesize)
+{
+  g_assert (gst_cuda_vtable.CuMemAllocHost != NULL);
+
+  return gst_cuda_vtable.CuMemAllocHost (pp, bytesize);
+}
+
+CUresult CUDAAPI
  CuMemcpy2D (const CUDA_MEMCPY2D * pCopy)
  {
    g_assert (gst_cuda_vtable.CuMemcpy2D != NULL);
@@ -310,6 +325,14 @@ CuMemFree (CUdeviceptr dptr)
  }
  
  CUresult CUDAAPI
+CuMemFreeHost (void *p)
+{
+  g_assert (gst_cuda_vtable.CuMemFreeHost != NULL);
+
+  return gst_cuda_vtable.CuMemFreeHost (p);
+}
+
+CUresult CUDAAPI
  CuStreamCreate (CUstream * phStream, unsigned int Flags)
  {
    g_assert (gst_cuda_vtable.CuStreamCreate != NULL);
diff --git a/sys/nvcodec/gstcudaloader.h b/sys/nvcodec/gstcudaloader.h

index 39cacbb..ba0e372 100644 (file)
--- a/sys/nvcodec/gstcudaloader.h
+++ b/sys/nvcodec/gstcudaloader.h
@@ -91,6 +91,10 @@ CUresult CUDAAPI CuMemAllocPitch    (CUdeviceptr * dptr,
                                       unsigned int ElementSizeBytes);
  
  G_GNUC_INTERNAL
+CUresult CUDAAPI CuMemAllocHost     (void **pp,
+                                     unsigned int bytesize);
+
+G_GNUC_INTERNAL
  CUresult CUDAAPI CuMemcpy2D         (const CUDA_MEMCPY2D * pCopy);
  
  G_GNUC_INTERNAL
@@ -100,6 +104,9 @@ G_GNUC_INTERNAL
  CUresult CUDAAPI CuMemFree          (CUdeviceptr dptr);
  
  G_GNUC_INTERNAL
+CUresult CUDAAPI CuMemFreeHost      (void *p);
+
+G_GNUC_INTERNAL
  CUresult CUDAAPI CuStreamCreate     (CUstream *phStream,
                                       unsigned int Flags);
  
diff --git a/sys/nvcodec/gstcudamemory.c b/sys/nvcodec/gstcudamemory.c

new file mode 100644 (file)

index 0000000..b6b5acb
--- /dev/null
+++ b/sys/nvcodec/gstcudamemory.c
@@ -0,0 +1,485 @@
+/* GStreamer
+ * Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstcudamemory.h"
+#include "gstcudautils.h"
+
+#include <string.h>
+
+GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug);
+#define GST_CAT_DEFAULT cudaallocator_debug
+GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
+
+#define gst_cuda_allocator_parent_class parent_class
+G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
+
+static void gst_cuda_allocator_dispose (GObject * object);
+static void gst_cuda_allocator_free (GstAllocator * allocator,
+    GstMemory * memory);
+
+static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize,
+    GstMapFlags flags);
+static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info);
+static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
+
+static GstMemory *
+gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
+    GstAllocationParams * params)
+{
+  g_return_val_if_reached (NULL);
+}
+
+static void
+gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
+{
+  GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+  GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
+
+  gobject_class->dispose = gst_cuda_allocator_dispose;
+
+  allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
+  allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
+
+  GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0,
+      "CUDA Allocator");
+  GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
+}
+
+static void
+gst_cuda_allocator_init (GstCudaAllocator * allocator)
+{
+  GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator);
+
+  GST_DEBUG_OBJECT (allocator, "init");
+
+  alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
+
+  alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map;
+  alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full;
+  alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy;
+
+  GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
+}
+
+static void
+gst_cuda_allocator_dispose (GObject * object)
+{
+  GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object);
+
+  GST_DEBUG_OBJECT (self, "dispose");
+
+  gst_clear_object (&self->context);
+  G_OBJECT_CLASS (parent_class)->dispose (object);
+}
+
+GstMemory *
+gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
+    GstCudaAllocationParams * params)
+{
+  GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
+  gsize maxsize = size + params->parent.prefix + params->parent.padding;
+  gsize align = params->parent.align;
+  gsize offset = params->parent.prefix;
+  GstMemoryFlags flags = params->parent.flags;
+  CUdeviceptr data;
+  gboolean ret = FALSE;
+  GstCudaMemory *mem;
+  GstVideoInfo *info = &params->info;
+  gint i;
+  guint width, height;
+  gsize stride, plane_offset;
+
+  if (!gst_cuda_context_push (self->context))
+    return NULL;
+
+  /* ensure configured alignment */
+  align |= gst_memory_alignment;
+  /* allocate more to compensate for alignment */
+  maxsize += align;
+
+  GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory");
+
+  width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) *
+      GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
+  height = 0;
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++)
+    height += GST_VIDEO_INFO_COMP_HEIGHT (info, i);
+
+  ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16));
+  gst_cuda_context_pop (NULL);
+
+  if (G_UNLIKELY (!ret)) {
+    GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure");
+    return NULL;
+  }
+
+  mem = g_new0 (GstCudaMemory, 1);
+  g_mutex_init (&mem->lock);
+  mem->data = data;
+  mem->alloc_params = *params;
+  mem->stride = stride;
+
+  plane_offset = 0;
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
+    mem->offset[i] = plane_offset;
+    plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i);
+  }
+
+  mem->context = gst_object_ref (self->context);
+
+  gst_memory_init (GST_MEMORY_CAST (mem),
+      flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
+
+  return GST_MEMORY_CAST (mem);
+}
+
+static void
+gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
+{
+  GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
+  GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
+
+  GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory");
+
+  g_mutex_clear (&mem->lock);
+
+  gst_cuda_context_push (self->context);
+  if (mem->data)
+    gst_cuda_result (CuMemFree (mem->data));
+
+  if (mem->map_alloc_data)
+    gst_cuda_result (CuMemFreeHost (mem->map_alloc_data));
+
+  gst_cuda_context_pop (NULL);
+  gst_object_unref (mem->context);
+
+  g_free (mem);
+}
+
+/* called with lock */
+static gboolean
+gst_cuda_memory_upload_transfer (GstCudaMemory * mem)
+{
+  gint i;
+  GstVideoInfo *info = &mem->alloc_params.info;
+  gboolean ret = TRUE;
+
+  if (!mem->map_data) {
+    GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
+    return FALSE;
+  }
+
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
+    CUDA_MEMCPY2D param = { 0, };
+
+    param.srcMemoryType = CU_MEMORYTYPE_HOST;
+    param.srcHost =
+        (guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
+    param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
+
+    param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+    param.dstDevice = mem->data + mem->offset[i];
+    param.dstPitch = mem->stride;
+    param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
+        GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
+    param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
+
+    if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
+      GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
+      ret = FALSE;
+      break;
+    }
+  }
+  gst_cuda_result (CuStreamSynchronize (NULL));
+
+  return ret;
+}
+
+/* called with lock */
+static gboolean
+gst_cuda_memory_download_transfer (GstCudaMemory * mem)
+{
+  gint i;
+  GstVideoInfo *info = &mem->alloc_params.info;
+
+  if (!mem->map_data) {
+    GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
+    return FALSE;
+  }
+
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
+    CUDA_MEMCPY2D param = { 0, };
+
+    param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+    param.srcDevice = mem->data + mem->offset[i];
+    param.srcPitch = mem->stride;
+
+    param.dstMemoryType = CU_MEMORYTYPE_HOST;
+    param.dstHost =
+        (guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
+    param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
+    param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
+        GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
+    param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
+
+    if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
+      GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
+      CuMemFreeHost (mem->map_alloc_data);
+      mem->map_alloc_data = mem->map_data = mem->align_data = NULL;
+      break;
+    }
+  }
+  gst_cuda_result (CuStreamSynchronize (NULL));
+
+  return ! !mem->map_data;
+}
+
+static gpointer
+gst_cuda_memory_device_memory_map (GstCudaMemory * mem)
+{
+  GstMemory *memory = GST_MEMORY_CAST (mem);
+  gpointer data;
+  gsize aoffset;
+  gsize align = memory->align;
+
+  if (mem->map_data) {
+    return mem->map_data;
+  }
+
+  GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map");
+
+  if (!mem->map_alloc_data) {
+    gsize maxsize;
+    guint8 *align_data;
+
+    maxsize = memory->maxsize + align;
+    if (!gst_cuda_context_push (mem->context)) {
+      GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
+
+      return NULL;
+    }
+
+    if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) {
+      GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory");
+      gst_cuda_context_pop (NULL);
+
+      return NULL;
+    }
+
+    if (!gst_cuda_context_pop (NULL)) {
+      GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
+    }
+
+    mem->map_alloc_data = data;
+    align_data = data;
+
+    /* do align */
+    if ((aoffset = ((guintptr) align_data & align))) {
+      aoffset = (align + 1) - aoffset;
+      align_data += aoffset;
+    }
+    mem->align_data = align_data;
+
+    /* first memory, always need download to staging */
+    GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
+  }
+
+  mem->map_data = mem->align_data;
+
+  if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
+    if (!gst_cuda_context_push (mem->context)) {
+      GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
+
+      return NULL;
+    }
+
+    gst_cuda_memory_download_transfer (mem);
+
+    if (!gst_cuda_context_pop (NULL)) {
+      GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
+    }
+  }
+
+  return mem->map_data;
+}
+
+static gpointer
+cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags)
+{
+  gpointer ret = NULL;
+
+  g_mutex_lock (&mem->lock);
+  mem->map_count++;
+
+  if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
+    /* upload from staging to device memory if necessary */
+    if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
+      if (!gst_cuda_context_push (mem->context)) {
+        GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
+        g_mutex_unlock (&mem->lock);
+
+        return NULL;
+      }
+
+      if (!gst_cuda_memory_upload_transfer (mem)) {
+        g_mutex_unlock (&mem->lock);
+        return NULL;
+      }
+
+      gst_cuda_context_pop (NULL);
+    }
+
+    GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
+
+    if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
+      GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
+
+    g_mutex_unlock (&mem->lock);
+    return (gpointer) mem->data;
+  }
+
+  ret = gst_cuda_memory_device_memory_map (mem);
+  if (ret == NULL) {
+    mem->map_count--;
+    g_mutex_unlock (&mem->lock);
+    return NULL;
+  }
+
+  if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
+    GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
+
+  GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
+
+  g_mutex_unlock (&mem->lock);
+
+  return ret;
+}
+
+static void
+cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
+{
+  g_mutex_lock (&mem->lock);
+  mem->map_count--;
+  GST_CAT_TRACE (GST_CAT_MEMORY,
+      "unmap CUDA memory %p, map count %d, have map_data %s",
+      mem, mem->map_count, mem->map_data ? "true" : "false");
+
+  if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
+    if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
+      GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
+
+    g_mutex_unlock (&mem->lock);
+    return;
+  }
+
+  if ((info->flags & GST_MAP_WRITE))
+    GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
+
+  if (mem->map_count > 0 || !mem->map_data) {
+    g_mutex_unlock (&mem->lock);
+    return;
+  }
+
+  mem->map_data = NULL;
+  g_mutex_unlock (&mem->lock);
+
+  return;
+}
+
+static GstMemory *
+cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
+{
+  GstMemory *copy;
+  GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
+  GstCudaMemory *dst_mem;
+  GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context;
+  gint i;
+  GstVideoInfo *info;
+
+  /* offset and size are ignored */
+  copy = gst_cuda_allocator_alloc (mem->allocator, mem->size,
+      &src_mem->alloc_params);
+
+  dst_mem = GST_CUDA_MEMORY_CAST (copy);
+
+  info = &src_mem->alloc_params.info;
+
+  if (!gst_cuda_context_push (ctx)) {
+    GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
+    gst_cuda_allocator_free (mem->allocator, copy);
+
+    return NULL;
+  }
+
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
+    CUDA_MEMCPY2D param = { 0, };
+
+    param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+    param.srcDevice = src_mem->data + src_mem->offset[i];
+    param.srcPitch = src_mem->stride;
+
+    param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+    param.dstDevice = dst_mem->data + dst_mem->offset[i];
+    param.dstPitch = dst_mem->stride;
+    param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
+        GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
+    param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
+
+    if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
+      GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY,
+          mem->allocator, "Failed to copy %dth plane", i);
+      gst_cuda_context_pop (NULL);
+      gst_cuda_allocator_free (mem->allocator, copy);
+
+      return NULL;
+    }
+  }
+
+  gst_cuda_result (CuStreamSynchronize (NULL));
+
+  if (!gst_cuda_context_pop (NULL)) {
+    GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
+  }
+
+  return copy;
+}
+
+GstAllocator *
+gst_cuda_allocator_new (GstCudaContext * context)
+{
+  GstCudaAllocator *allocator;
+
+  g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
+
+  allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
+  allocator->context = gst_object_ref (context);
+
+  return GST_ALLOCATOR_CAST (allocator);
+}
+
+gboolean
+gst_is_cuda_memory (GstMemory * mem)
+{
+  return mem != NULL && mem->allocator != NULL &&
+      GST_IS_CUDA_ALLOCATOR (mem->allocator);
+}
diff --git a/sys/nvcodec/gstcudamemory.h b/sys/nvcodec/gstcudamemory.h

new file mode 100644 (file)

index 0000000..7983766
--- /dev/null
+++ b/sys/nvcodec/gstcudamemory.h
@@ -0,0 +1,138 @@
+/* GStreamer
+ * Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_CUDA_MEMORY_H__
+#define __GST_CUDA_MEMORY_H__
+
+#include <gst/gst.h>
+#include <gst/gstallocator.h>
+#include <gst/video/video.h>
+#include "gstcudaloader.h"
+#include "gstcudacontext.h"
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_CUDA_ALLOCATOR             (gst_cuda_allocator_get_type())
+#define GST_CUDA_ALLOCATOR(obj)             (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocator))
+#define GST_CUDA_ALLOCATOR_CLASS(klass)     (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocatorClass))
+#define GST_CUDA_ALLOCATOR_GET_CLASS(obj)   (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocatorClass))
+#define GST_IS_CUDA_ALLOCATOR(obj)          (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_CUDA_ALLOCATOR))
+#define GST_IS_CUDA_ALLOCATOR_CLASS(klass)  (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_CUDA_ALLOCATOR))
+#define GST_CUDA_ALLOCATOR_CAST(obj)        ((GstCudaAllocator *)(obj))
+#define GST_CUDA_MEMORY_CAST(mem)           ((GstCudaMemory *) (mem))
+
+typedef struct _GstCudaAllocationParams GstCudaAllocationParams;
+typedef struct _GstCudaAllocator GstCudaAllocator;
+typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass;
+typedef struct _GstCudaMemory GstCudaMemory;
+
+/**
+ * GST_MAP_CUDA:
+ *
+ * Flag indicating that we should map the CUDA device memory
+ * instead of to system memory.
+ *
+ * Combining #GST_MAP_CUDA with #GST_MAP_WRITE has the same semantics as though
+ * you are writing to CUDA device/host memory.
+ * Conversely, combining #GST_MAP_CUDA with
+ * #GST_MAP_READ has the same semantics as though you are reading from
+ * CUDA device/host memory
+ */
+#define GST_MAP_CUDA (GST_MAP_FLAG_LAST << 1)
+
+#define GST_CUDA_MEMORY_TYPE_NAME "gst.cuda.memory"
+
+/**
+ * GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY:
+ *
+ * Name of the caps feature for indicating the use of #GstCudaMemory
+ */
+#define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory"
+
+struct _GstCudaAllocationParams
+{
+  GstAllocationParams parent;
+
+  GstVideoInfo info;
+};
+
+struct _GstCudaAllocator
+{
+  GstAllocator parent;
+  GstCudaContext *context;
+};
+
+struct _GstCudaAllocatorClass
+{
+  GstAllocatorClass parent_class;
+};
+
+GType          gst_cuda_allocator_get_type (void);
+
+GstAllocator * gst_cuda_allocator_new (GstCudaContext * context);
+
+GstMemory    * gst_cuda_allocator_alloc (GstAllocator * allocator,
+                                         gsize size,
+                                         GstCudaAllocationParams * params);
+
+/**
+ * GstCudaMemoryTransfer:
+ * @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading
+ *                                          to the staging memory
+ * @GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD:   the staging memory needs uploading
+ *                                          to the device memory
+ */
+typedef enum
+{
+  GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD   = (GST_MEMORY_FLAG_LAST << 0),
+  GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD     = (GST_MEMORY_FLAG_LAST << 1)
+} GstCudaMemoryTransfer;
+
+struct _GstCudaMemory
+{
+  GstMemory       mem;
+
+  GstCudaContext *context;
+  CUdeviceptr data;
+
+  GstCudaAllocationParams alloc_params;
+
+  /* offset and stride of CUDA device memory */
+  gsize offset[GST_VIDEO_MAX_PLANES];
+  gint stride;
+
+  /* allocated CUDA Host memory */
+  gpointer map_alloc_data;
+
+  /* aligned CUDA Host memory */
+  guint8 *align_data;
+
+  /* pointing align_data if the memory is mapped */
+  gpointer map_data;
+
+  gint map_count;
+
+  GMutex lock;
+};
+
+gboolean        gst_is_cuda_memory        (GstMemory * mem);
+
+G_END_DECLS
+
+#endif /* __GST_CUDA_MEMORY_H__ */
diff --git a/sys/nvcodec/meson.build b/sys/nvcodec/meson.build

index d77cc6d..e2039e6 100644 (file)
--- a/sys/nvcodec/meson.build
+++ b/sys/nvcodec/meson.build
@@ -12,6 +12,8 @@ nvcodec_sources = [
    'gstnvdecoder.c',
    'gstnvh264dec.c',
    'gstnvh265dec.c',
+  'gstcudamemory.c',
+  'gstcudabufferpool.c',
  ]
  
  if get_option('nvcodec').disabled()
diff --git a/sys/nvcodec/stub/cuda.h b/sys/nvcodec/stub/cuda.h

index 985d882..b844cbc 100644 (file)
--- a/sys/nvcodec/stub/cuda.h
+++ b/sys/nvcodec/stub/cuda.h
@@ -114,6 +114,7 @@ typedef enum
  
  #define cuMemAlloc cuMemAlloc_v2
  #define cuMemAllocPitch cuMemAllocPitch_v2
+#define cuMemAllocHost  cuMemAllocHost_v2
  #define cuMemcpy2D cuMemcpy2D_v2
  #define cuMemcpy2DAsync cuMemcpy2DAsync_v2
  #define cuMemFree cuMemFree_v2
author	Seungha Yang <seungha.yang@navercorp.com>
	Mon, 19 Aug 2019 09:02:56 +0000 (18:02 +0900)
committer	GStreamer Merge Bot <gitlab-merge-bot@gstreamer-foundation.org>
	Fri, 16 Oct 2020 15:56:49 +0000 (15:56 +0000)
sys/nvcodec/gstcudabufferpool.c	[new file with mode: 0644]	patch \| blob
sys/nvcodec/gstcudabufferpool.h	[new file with mode: 0644]	patch \| blob
sys/nvcodec/gstcudaloader.c		patch \| blob \| history
sys/nvcodec/gstcudaloader.h		patch \| blob \| history
sys/nvcodec/gstcudamemory.c	[new file with mode: 0644]	patch \| blob
sys/nvcodec/gstcudamemory.h	[new file with mode: 0644]	patch \| blob
sys/nvcodec/meson.build		patch \| blob \| history
sys/nvcodec/stub/cuda.h		patch \| blob \| history