2 * Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
24 #include "gstcudamemory.h"
25 #include "gstcudautils.h"
29 GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug);
30 #define GST_CAT_DEFAULT cuda_allocator_debug
32 static GstAllocator *_gst_cuda_allocator = NULL;
34 struct _GstCudaMemoryPrivate
39 /* params used for cuMemAllocPitch */
47 #define gst_cuda_allocator_parent_class parent_class
48 G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
50 static void gst_cuda_allocator_free (GstAllocator * allocator,
53 static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize,
55 static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info);
56 static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
59 gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
60 GstAllocationParams * params)
62 g_return_val_if_reached (NULL);
66 gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
68 GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
70 allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
71 allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
73 GST_DEBUG_CATEGORY_INIT (cuda_allocator_debug, "cudaallocator", 0,
78 gst_cuda_allocator_init (GstCudaAllocator * allocator)
80 GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator);
82 GST_DEBUG_OBJECT (allocator, "init");
84 alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
86 alloc->mem_map = cuda_mem_map;
87 alloc->mem_unmap_full = cuda_mem_unmap_full;
88 alloc->mem_copy = cuda_mem_copy;
90 GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
94 gst_cuda_allocator_alloc_internal (GstCudaAllocator * self,
95 GstCudaContext * context, const GstVideoInfo * info,
96 guint width_in_bytes, guint alloc_height)
98 GstCudaMemoryPrivate *priv;
101 gboolean ret = FALSE;
103 guint height = GST_VIDEO_INFO_HEIGHT (info);
104 GstVideoInfo *alloc_info;
106 if (!gst_cuda_context_push (context))
109 ret = gst_cuda_result (CuMemAllocPitch (&data, &pitch, width_in_bytes,
111 gst_cuda_context_pop (NULL);
114 GST_ERROR_OBJECT (self, "Failed to allocate CUDA memory");
118 mem = g_new0 (GstCudaMemory, 1);
119 mem->priv = priv = g_new0 (GstCudaMemoryPrivate, 1);
123 priv->width_in_bytes = width_in_bytes;
124 priv->height = alloc_height;
125 g_mutex_init (&priv->lock);
127 mem->context = gst_object_ref (context);
129 mem->info.size = pitch * alloc_height;
131 alloc_info = &mem->info;
132 gst_memory_init (GST_MEMORY_CAST (mem), 0, GST_ALLOCATOR_CAST (self),
133 NULL, alloc_info->size, 0, 0, alloc_info->size);
135 switch (GST_VIDEO_INFO_FORMAT (info)) {
136 case GST_VIDEO_FORMAT_I420:
137 case GST_VIDEO_FORMAT_YV12:
138 case GST_VIDEO_FORMAT_I420_10LE:
139 /* we are wasting space yes, but required so that this memory
140 * can be used in kernel function */
141 alloc_info->stride[0] = pitch;
142 alloc_info->stride[1] = pitch;
143 alloc_info->stride[2] = pitch;
144 alloc_info->offset[0] = 0;
145 alloc_info->offset[1] = alloc_info->stride[0] * height;
146 alloc_info->offset[2] = alloc_info->offset[1] +
147 alloc_info->stride[1] * height / 2;
149 case GST_VIDEO_FORMAT_NV12:
150 case GST_VIDEO_FORMAT_NV21:
151 case GST_VIDEO_FORMAT_P010_10LE:
152 case GST_VIDEO_FORMAT_P016_LE:
153 alloc_info->stride[0] = pitch;
154 alloc_info->stride[1] = pitch;
155 alloc_info->offset[0] = 0;
156 alloc_info->offset[1] = alloc_info->stride[0] * height;
158 case GST_VIDEO_FORMAT_Y444:
159 case GST_VIDEO_FORMAT_Y444_16LE:
160 alloc_info->stride[0] = pitch;
161 alloc_info->stride[1] = pitch;
162 alloc_info->stride[2] = pitch;
163 alloc_info->offset[0] = 0;
164 alloc_info->offset[1] = alloc_info->stride[0] * height;
165 alloc_info->offset[2] = alloc_info->offset[1] * 2;
167 case GST_VIDEO_FORMAT_BGRA:
168 case GST_VIDEO_FORMAT_RGBA:
169 case GST_VIDEO_FORMAT_RGBx:
170 case GST_VIDEO_FORMAT_BGRx:
171 case GST_VIDEO_FORMAT_ARGB:
172 case GST_VIDEO_FORMAT_ABGR:
173 case GST_VIDEO_FORMAT_RGB:
174 case GST_VIDEO_FORMAT_BGR:
175 case GST_VIDEO_FORMAT_BGR10A2_LE:
176 case GST_VIDEO_FORMAT_RGB10A2_LE:
177 alloc_info->stride[0] = pitch;
178 alloc_info->offset[0] = 0;
181 GST_ERROR_OBJECT (self, "Unexpected format %s",
182 gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info)));
183 g_assert_not_reached ();
184 gst_memory_unref (GST_MEMORY_CAST (mem));
188 return GST_MEMORY_CAST (mem);
192 gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
194 GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
195 GstCudaMemoryPrivate *priv = mem->priv;
197 gst_cuda_context_push (mem->context);
199 gst_cuda_result (CuMemFree (priv->data));
202 gst_cuda_result (CuMemFreeHost (priv->staging));
203 gst_cuda_context_pop (NULL);
205 gst_object_unref (mem->context);
207 g_mutex_clear (&priv->lock);
213 gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem)
215 GstCudaMemoryPrivate *priv = mem->priv;
217 CUDA_MEMCPY2D param = { 0, };
219 if (!priv->staging ||
220 !GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
224 if (!gst_cuda_context_push (mem->context)) {
225 GST_ERROR_OBJECT (self, "Failed to push cuda context");
229 param.srcMemoryType = CU_MEMORYTYPE_HOST;
230 param.srcHost = priv->staging;
231 param.srcPitch = priv->pitch;
233 param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
234 param.dstDevice = (CUdeviceptr) priv->data;
235 param.dstPitch = priv->pitch;
236 param.WidthInBytes = priv->width_in_bytes;
237 param.Height = priv->height;
239 ret = gst_cuda_result (CuMemcpy2D (¶m));
240 gst_cuda_context_pop (NULL);
243 GST_ERROR_OBJECT (self, "Failed to upload memory");
249 gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem)
251 GstCudaMemoryPrivate *priv = mem->priv;
253 CUDA_MEMCPY2D param = { 0, };
255 if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD))
258 if (!gst_cuda_context_push (mem->context)) {
259 GST_ERROR_OBJECT (self, "Failed to push cuda context");
263 if (!priv->staging) {
264 ret = gst_cuda_result (CuMemAllocHost (&priv->staging,
265 GST_MEMORY_CAST (mem)->size));
267 GST_ERROR_OBJECT (self, "Failed to allocate staging memory");
268 gst_cuda_context_pop (NULL);
273 param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
274 param.srcDevice = (CUdeviceptr) priv->data;
275 param.srcPitch = priv->pitch;
277 param.dstMemoryType = CU_MEMORYTYPE_HOST;
278 param.dstHost = priv->staging;
279 param.dstPitch = priv->pitch;
280 param.WidthInBytes = priv->width_in_bytes;
281 param.Height = priv->height;
283 ret = gst_cuda_result (CuMemcpy2D (¶m));
284 gst_cuda_context_pop (NULL);
287 GST_ERROR_OBJECT (self, "Failed to upload memory");
293 cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags)
295 GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
296 GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
297 GstCudaMemoryPrivate *priv = cmem->priv;
300 g_mutex_lock (&priv->lock);
301 if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
302 if (!gst_cuda_memory_upload (self, cmem))
305 GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
307 if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
308 GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
310 ret = (gpointer) priv->data;
314 /* First CPU access, must be downloaded */
316 GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
318 if (!gst_cuda_memory_download (self, cmem))
323 if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
324 GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
326 GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
329 g_mutex_unlock (&priv->lock);
335 cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info)
337 GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
338 GstCudaMemoryPrivate *priv = cmem->priv;
340 g_mutex_lock (&priv->lock);
341 if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
342 if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
343 GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
348 if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
349 GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
352 g_mutex_unlock (&priv->lock);
358 cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
360 GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
361 GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
362 GstCudaContext *context = src_mem->context;
363 GstMapInfo src_info, dst_info;
364 CUDA_MEMCPY2D param = { 0, };
368 /* offset and size are ignored */
369 copy = gst_cuda_allocator_alloc_internal (self, context,
370 &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height);
373 GST_ERROR_OBJECT (self, "Failed to allocate memory for copying");
377 if (!gst_memory_map (mem, &src_info, GST_MAP_READ | GST_MAP_CUDA)) {
378 GST_ERROR_OBJECT (self, "Failed to map src memory");
379 gst_memory_unref (copy);
383 if (!gst_memory_map (copy, &dst_info, GST_MAP_WRITE | GST_MAP_CUDA)) {
384 GST_ERROR_OBJECT (self, "Failed to map dst memory");
385 gst_memory_unmap (mem, &src_info);
386 gst_memory_unref (copy);
390 if (!gst_cuda_context_push (context)) {
391 GST_ERROR_OBJECT (self, "Failed to push cuda context");
392 gst_memory_unmap (mem, &src_info);
393 gst_memory_unmap (copy, &dst_info);
398 param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
399 param.srcDevice = (CUdeviceptr) src_info.data;
400 param.srcPitch = src_mem->priv->pitch;
402 param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
403 param.dstDevice = (CUdeviceptr) dst_info.data;
404 param.dstPitch = src_mem->priv->pitch;
405 param.WidthInBytes = src_mem->priv->width_in_bytes;
406 param.Height = src_mem->priv->height;
408 ret = gst_cuda_result (CuMemcpy2D (¶m));
409 gst_cuda_context_pop (NULL);
411 gst_memory_unmap (mem, &src_info);
412 gst_memory_unmap (copy, &dst_info);
415 GST_ERROR_OBJECT (self, "Failed to copy memory");
416 gst_memory_unref (copy);
424 gst_cuda_memory_init_once (void)
426 static gsize _init = 0;
428 if (g_once_init_enter (&_init)) {
429 _gst_cuda_allocator =
430 (GstAllocator *) g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
431 gst_object_ref_sink (_gst_cuda_allocator);
433 gst_allocator_register (GST_CUDA_MEMORY_TYPE_NAME, _gst_cuda_allocator);
434 g_once_init_leave (&_init, 1);
439 gst_is_cuda_memory (GstMemory * mem)
441 return mem != NULL && mem->allocator != NULL &&
442 GST_IS_CUDA_ALLOCATOR (mem->allocator);
446 gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
447 GstCudaContext * context, const GstVideoInfo * info)
451 g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), NULL);
452 g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
453 g_return_val_if_fail (info != NULL, NULL);
455 alloc_height = GST_VIDEO_INFO_HEIGHT (info);
457 /* make sure valid height for subsampled formats */
458 switch (GST_VIDEO_INFO_FORMAT (info)) {
459 case GST_VIDEO_FORMAT_I420:
460 case GST_VIDEO_FORMAT_YV12:
461 case GST_VIDEO_FORMAT_NV12:
462 case GST_VIDEO_FORMAT_P010_10LE:
463 case GST_VIDEO_FORMAT_P016_LE:
464 case GST_VIDEO_FORMAT_I420_10LE:
465 alloc_height = GST_ROUND_UP_2 (alloc_height);
471 switch (GST_VIDEO_INFO_FORMAT (info)) {
472 case GST_VIDEO_FORMAT_I420:
473 case GST_VIDEO_FORMAT_YV12:
474 case GST_VIDEO_FORMAT_I420_10LE:
475 case GST_VIDEO_FORMAT_NV12:
476 case GST_VIDEO_FORMAT_NV21:
477 case GST_VIDEO_FORMAT_P010_10LE:
478 case GST_VIDEO_FORMAT_P016_LE:
481 case GST_VIDEO_FORMAT_Y444:
482 case GST_VIDEO_FORMAT_Y444_16LE:
489 return gst_cuda_allocator_alloc_internal (allocator, context,
490 info, info->stride[0], alloc_height);