CUresult (*CuMemcpy2D) (const CUDA_MEMCPY2D * pCopy);
CUresult (*CuMemcpy2DAsync) (const CUDA_MEMCPY2D * pCopy, CUstream hStream);
CUresult (*CuMemFree) (CUdeviceptr dptr);
+ CUresult (*CuStreamCreate) (CUstream * phStream, unsigned int Flags);
+ CUresult (*CuStreamDestroy) (CUstream hStream);
CUresult (*CuStreamSynchronize) (CUstream hStream);
CUresult (*CuDeviceGet) (CUdevice * device, int ordinal);
LOAD_SYMBOL (cuMemcpy2DAsync, CuMemcpy2DAsync);
LOAD_SYMBOL (cuMemFree, CuMemFree);
+ LOAD_SYMBOL (cuStreamCreate, CuStreamCreate);
+ LOAD_SYMBOL (cuStreamDestroy, CuStreamDestroy);
LOAD_SYMBOL (cuStreamSynchronize, CuStreamSynchronize);
LOAD_SYMBOL (cuDeviceGet, CuDeviceGet);
}
CUresult
+CuStreamCreate (CUstream * phStream, unsigned int Flags)
+{
+ g_assert (gst_cuda_vtable.CuStreamCreate != NULL);
+
+ return gst_cuda_vtable.CuStreamCreate (phStream, Flags);
+}
+
+CUresult
+CuStreamDestroy (CUstream hStream)
+{
+ g_assert (gst_cuda_vtable.CuStreamDestroy != NULL);
+
+ return gst_cuda_vtable.CuStreamDestroy (hStream);
+}
+
+CUresult
CuStreamSynchronize (CUstream hStream)
{
g_assert (gst_cuda_vtable.CuStreamSynchronize != NULL);
{
GstNvDec *nvdec = GST_NVDEC (decoder);
GstNvDecClass *klass = GST_NVDEC_GET_CLASS (nvdec);
+ CUresult cuda_ret;
GST_DEBUG_OBJECT (nvdec, "creating CUDA context");
GST_ERROR_OBJECT (nvdec, "failed to create CUDA context");
return FALSE;
}
+
+ if (gst_cuda_context_push (nvdec->cuda_ctx)) {
+ cuda_ret = CuStreamCreate (&nvdec->cuda_stream, CU_STREAM_NON_BLOCKING);
+ if (!gst_cuda_result (cuda_ret)) {
+ GST_WARNING_OBJECT (nvdec,
+ "Could not create cuda stream, will use default stream");
+ nvdec->cuda_stream = NULL;
+ }
+ gst_cuda_context_pop (NULL);
+ }
#if HAVE_NVCODEC_GST_GL
gst_gl_ensure_element_data (GST_ELEMENT (nvdec),
&nvdec->gl_display, &nvdec->other_gl_context);
{
GstNvDec *nvdec = GST_NVDEC (decoder);
+ if (nvdec->cuda_ctx && nvdec->cuda_stream) {
+ if (gst_cuda_context_push (nvdec->cuda_ctx)) {
+ gst_cuda_result (CuStreamDestroy (nvdec->cuda_stream));
+ gst_cuda_context_pop (NULL);
+ }
+ }
+
gst_clear_object (&nvdec->cuda_ctx);
+ nvdec->cuda_stream = NULL;
return TRUE;
}
}
if (!gst_cuda_result (CuGraphicsMapResources (num_resources, resources,
- NULL))) {
+ nvdec->cuda_stream))) {
GST_WARNING_OBJECT (nvdec, "failed to map CUDA resources");
data->ret = FALSE;
goto unmap_video_frame;
mcpy2d.dstArray = array;
mcpy2d.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
- if (!gst_cuda_result (CuMemcpy2DAsync (&mcpy2d, 0))) {
+ if (!gst_cuda_result (CuMemcpy2DAsync (&mcpy2d, nvdec->cuda_stream))) {
GST_WARNING_OBJECT (nvdec, "memcpy to mapped array failed");
data->ret = FALSE;
}
}
- gst_cuda_result (CuStreamSynchronize (0));
-
if (!gst_cuda_result (CuGraphicsUnmapResources (num_resources, resources,
- NULL)))
+ nvdec->cuda_stream)))
GST_WARNING_OBJECT (nvdec, "failed to unmap CUDA resources");
+ gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
+
unmap_video_frame:
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
GST_WARNING_OBJECT (nvdec, "failed to unmap CUDA video frame");
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i);
- if (!gst_cuda_result (CuMemcpy2DAsync (©_params, 0))) {
+ if (!gst_cuda_result (CuMemcpy2DAsync (©_params, nvdec->cuda_stream))) {
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
CuvidUnmapVideoFrame (nvdec->decoder, dptr);
gst_video_frame_unmap (&video_frame);
}
}
- gst_cuda_result (CuStreamSynchronize (0));
+ gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
gst_video_frame_unmap (&video_frame);