#include "opencv2/core.hpp"
#include "opencv2/core/gpu_types.hpp"
-namespace cv { namespace gpu
-{
+namespace cv { namespace gpu {
//////////////////////////////// GpuMat ///////////////////////////////
//! unmaps the memory of matrix m, and makes it pageable again
CV_EXPORTS void unregisterPageLocked(Mat& m);
-//////////////////////////////// CudaStream ////////////////////////////////
+///////////////////////////////// Stream //////////////////////////////////
+
// Encapculates Cuda Stream. Provides interface for async coping.
// Passed to each function that supports async kernel execution.
-// Reference counting is enabled
+// Reference counting is enabled.
class CV_EXPORTS Stream
{
+ typedef void (Stream::*bool_type)() const;
+ void this_type_does_not_support_comparisons() const {}
+
public:
+ typedef void (*StreamCallback)(int status, void* userData);
+
+ //! creates a new asynchronous stream
Stream();
- ~Stream();
- Stream(const Stream&);
- Stream& operator =(const Stream&);
+ //! queries an asynchronous stream for completion status
+ bool queryIfComplete() const;
- bool queryIfComplete();
+ //! waits for stream tasks to complete
void waitForCompletion();
- //! downloads asynchronously
- // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
- void enqueueDownload(const GpuMat& src, CudaMem& dst);
- void enqueueDownload(const GpuMat& src, Mat& dst);
+ //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
+ void enqueueHostCallback(StreamCallback callback, void* userData);
- //! uploads asynchronously
- // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
- void enqueueUpload(const CudaMem& src, GpuMat& dst);
- void enqueueUpload(const Mat& src, GpuMat& dst);
+ //! return Stream object for default CUDA stream
+ static Stream& Null();
- //! copy asynchronously
- void enqueueCopy(const GpuMat& src, GpuMat& dst);
+ //! returns true if stream object is not default (!= 0)
+ operator bool_type() const;
- //! memory set asynchronously
- void enqueueMemSet(GpuMat& src, Scalar val);
- void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask);
+ // obsolete methods
- //! converts matrix type, ex from float to uchar depending on type
- void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0);
+ void enqueueDownload(const GpuMat& src, OutputArray dst);
- //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
- typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
- void enqueueHostCallback(StreamCallback callback, void* userData);
+ void enqueueUpload(InputArray src, GpuMat& dst);
- static Stream& Null();
+ void enqueueCopy(const GpuMat& src, OutputArray dst);
- operator bool() const;
+ void enqueueMemSet(GpuMat& src, Scalar val);
+ void enqueueMemSet(GpuMat& src, Scalar val, InputArray mask);
-private:
- struct Impl;
+ void enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha = 1.0, double beta = 0.0);
- explicit Stream(Impl* impl);
- void create();
- void release();
+ class Impl;
- Impl *impl;
+private:
+ Ptr<Impl> impl_;
+ Stream(const Ptr<Impl>& impl);
friend struct StreamAccessor;
};
CV_EXPORTS void printShortCudaDeviceInfo(int device);
-}} // cv::gpu
+}} // namespace cv { namespace gpu {
+
+namespace cv {
+
+template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
+
+}
#include "opencv2/core/gpu.inl.hpp"
#include "opencv2/core/gpu.hpp"
-namespace cv { namespace gpu
-{
+namespace cv { namespace gpu {
//////////////////////////////// GpuMat ///////////////////////////////
a.swap(b);
}
-}} // namespace cv { namespace gpu
+//////////////////////////////// Stream ///////////////////////////////
+
+inline
+void Stream::enqueueDownload(const GpuMat& src, OutputArray dst)
+{
+ src.download(dst, *this);
+}
+
+inline
+void Stream::enqueueUpload(InputArray src, GpuMat& dst)
+{
+ dst.upload(src, *this);
+}
+
+inline
+void Stream::enqueueCopy(const GpuMat& src, OutputArray dst)
+{
+ src.copyTo(dst, *this);
+}
+
+inline
+void Stream::enqueueMemSet(GpuMat& src, Scalar val)
+{
+ src.setTo(val, *this);
+}
+
+inline
+void Stream::enqueueMemSet(GpuMat& src, Scalar val, InputArray mask)
+{
+ src.setTo(val, mask, *this);
+}
+
+inline
+void Stream::enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha, double beta)
+{
+ src.convertTo(dst, dtype, alpha, beta, *this);
+}
+
+inline
+Stream::Stream(const Ptr<Impl>& impl)
+ : impl_(impl)
+{
+}
+
+}} // namespace cv { namespace gpu {
//////////////////////////////// Mat ////////////////////////////////
using namespace cv;
using namespace cv::gpu;
-#if !defined (HAVE_CUDA)
+#ifndef HAVE_CUDA
-cv::gpu::Stream::Stream() { throw_no_cuda(); }
-cv::gpu::Stream::~Stream() {}
-cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); }
-Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; }
-bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; }
-void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); }
-void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); }
-Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; }
-cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; }
-cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); }
-void cv::gpu::Stream::create() { throw_no_cuda(); }
-void cv::gpu::Stream::release() { throw_no_cuda(); }
-
-#else /* !defined (HAVE_CUDA) */
-
-struct Stream::Impl
+class cv::gpu::Stream::Impl
{
- static cudaStream_t getStream(const Impl* impl)
+public:
+ Impl(void* ptr = 0)
{
- return impl ? impl->stream : 0;
+ (void) ptr;
+ throw_no_cuda();
}
+};
+
+#else
+class cv::gpu::Stream::Impl
+{
+public:
cudaStream_t stream;
- int ref_counter;
+
+ Impl();
+ Impl(cudaStream_t stream);
+
+ ~Impl();
};
-cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
+cv::gpu::Stream::Impl::Impl() : stream(0)
{
- return Stream::Impl::getStream(stream.impl);
+ cudaSafeCall( cudaStreamCreate(&stream) );
}
-cv::gpu::Stream::Stream() : impl(0)
+cv::gpu::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
{
- create();
}
-cv::gpu::Stream::~Stream()
+cv::gpu::Stream::Impl::~Impl()
{
- release();
+ if (stream)
+ cudaStreamDestroy(stream);
}
-cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
+cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
- if (impl)
- CV_XADD(&impl->ref_counter, 1);
+ return stream.impl_->stream;
}
-Stream& cv::gpu::Stream::operator =(const Stream& stream)
-{
- if (this != &stream)
- {
- release();
- impl = stream.impl;
- if (impl)
- CV_XADD(&impl->ref_counter, 1);
- }
+#endif
- return *this;
+cv::gpu::Stream::Stream()
+{
+#ifndef HAVE_CUDA
+ throw_no_cuda();
+#else
+ impl_ = new Impl;
+#endif
}
-bool cv::gpu::Stream::queryIfComplete()
+bool cv::gpu::Stream::queryIfComplete() const
{
- cudaStream_t stream = Impl::getStream(impl);
- cudaError_t err = cudaStreamQuery(stream);
+#ifndef HAVE_CUDA
+ throw_no_cuda();
+ return false;
+#else
+ cudaError_t err = cudaStreamQuery(impl_->stream);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
+#endif
}
void cv::gpu::Stream::waitForCompletion()
{
- cudaStream_t stream = Impl::getStream(impl);
- cudaSafeCall( cudaStreamSynchronize(stream) );
-}
-
-void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
-{
- // if not -> allocation will be done, but after that dst will not point to page locked memory
- CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
-
- cudaStream_t stream = Impl::getStream(impl);
- size_t bwidth = src.cols * src.elemSize();
- cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
-}
-
-void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
-{
- dst.create(src.size(), src.type());
-
- cudaStream_t stream = Impl::getStream(impl);
- size_t bwidth = src.cols * src.elemSize();
- cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
-}
-
-void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
-{
- dst.create(src.size(), src.type());
-
- cudaStream_t stream = Impl::getStream(impl);
- size_t bwidth = src.cols * src.elemSize();
- cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
-}
-
-void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
-{
- dst.create(src.size(), src.type());
-
- cudaStream_t stream = Impl::getStream(impl);
- size_t bwidth = src.cols * src.elemSize();
- cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
-}
-
-void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
-{
- dst.create(src.size(), src.type());
-
- cudaStream_t stream = Impl::getStream(impl);
- size_t bwidth = src.cols * src.elemSize();
- cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
-}
-
-void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
-{
- src.setTo(val, *this);
-}
-
-void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
-{
- src.setTo(val, mask, *this);
-}
-
-void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double alpha, double beta)
-{
- src.convertTo(dst, dtype, alpha, beta, *this);
+#ifndef HAVE_CUDA
+ throw_no_cuda();
+#else
+ cudaSafeCall( cudaStreamSynchronize(impl_->stream) );
+#endif
}
-#if CUDART_VERSION >= 5000
+#if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
namespace
{
struct CallbackData
{
- cv::gpu::Stream::StreamCallback callback;
+ Stream::StreamCallback callback;
void* userData;
- Stream stream;
+
+ CallbackData(Stream::StreamCallback callback_, void* userData_) : callback(callback_), userData(userData_) {}
};
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData)
{
CallbackData* data = reinterpret_cast<CallbackData*>(userData);
- data->callback(data->stream, static_cast<int>(status), data->userData);
+ data->callback(static_cast<int>(status), data->userData);
delete data;
}
}
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
{
-#if CUDART_VERSION >= 5000
- CallbackData* data = new CallbackData;
- data->callback = callback;
- data->userData = userData;
- data->stream = *this;
-
- cudaStream_t stream = Impl::getStream(impl);
-
- cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
-#else
+#ifndef HAVE_CUDA
(void) callback;
(void) userData;
- CV_Error(CV_StsNotImplemented, "This function requires CUDA 5.0");
+ throw_no_cuda();
+#else
+ #if CUDART_VERSION < 5000
+ (void) callback;
+ (void) userData;
+ CV_Error(cv::Error::StsNotImplemented, "This function requires CUDA 5.0");
+ #else
+ CallbackData* data = new CallbackData(callback, userData);
+
+ cudaSafeCall( cudaStreamAddCallback(impl_->stream, cudaStreamCallback, data, 0) );
+ #endif
#endif
}
-cv::gpu::Stream& cv::gpu::Stream::Null()
+Stream& cv::gpu::Stream::Null()
{
- static Stream s((Impl*) 0);
+ static Stream s(new Impl(0));
return s;
}
-cv::gpu::Stream::operator bool() const
-{
- return impl && impl->stream;
-}
-
-cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_)
+cv::gpu::Stream::operator bool_type() const
{
+#ifndef HAVE_CUDA
+ return 0;
+#else
+ return (impl_->stream != 0) ? &Stream::this_type_does_not_support_comparisons : 0;
+#endif
}
-void cv::gpu::Stream::create()
-{
- if (impl)
- release();
-
- cudaStream_t stream;
- cudaSafeCall( cudaStreamCreate( &stream ) );
-
- impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
-
- impl->stream = stream;
- impl->ref_counter = 1;
-}
-
-void cv::gpu::Stream::release()
+template <> void cv::Ptr<Stream::Impl>::delete_obj()
{
- if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
- {
- cudaSafeCall( cudaStreamDestroy(impl->stream) );
- cv::fastFree(impl);
- }
+ if (obj) delete obj;
}
-
-#endif /* !defined (HAVE_CUDA) */
{
if (src3.empty())
{
- if (stream)
- stream.enqueueMemSet(dst, Scalar::all(0));
- else
- dst.setTo(Scalar::all(0));
+ dst.setTo(Scalar::all(0), stream);
}
else
{
}
else
{
- if (stream)
- stream.enqueueCopy(src3, dst);
- else
- src3.copyTo(dst);
+ src3.copyTo(dst, stream);
}
}
}
cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
- if (s)
- s.enqueueMemSet(sum, Scalar::all(0));
- else
- sum.setTo(Scalar::all(0));
+
+ sum.setTo(Scalar::all(0), s);
GpuMat inner = sum(Rect(1, 1, src.cols, src.rows));
GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
- if (s)
- s.enqueueCopy(res, inner);
- else
- res.copyTo(inner);
+ res.copyTo(inner, s);
}
else
{
GpuMat result_block(result_roi_size, result_data.type(),
result_data.ptr(), result_data.step);
- if (stream)
- stream.enqueueCopy(result_block, result_roi);
- else
- result_block.copyTo(result_roi);
+ result_block.copyTo(result_roi, stream);
}
}
initialize(frame.size(), 0.0f, frame.depth() == CV_8U ? 255.0f : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0f);
fgmask.create(frameSize_, CV_8UC1);
- if (stream)
- stream.enqueueMemSet(fgmask, cv::Scalar::all(0));
- else
- fgmask.setTo(cv::Scalar::all(0));
+ fgmask.setTo(cv::Scalar::all(0), stream);
funcs[frame.depth()][frame.channels() - 1](frame, fgmask, colors_, weights_, nfeatures_, frameNum_, learningRate, updateBackgroundModel, cv::gpu::StreamAccessor::getStream(stream));
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
}
- if (stream)
- stream.enqueueMemSet(trainIdx, Scalar::all(-1));
- else
- trainIdx.setTo(Scalar::all(-1));
+ trainIdx.setTo(Scalar::all(-1), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx);
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
- if (stream)
- stream.enqueueMemSet(trainIdx, Scalar::all(-1));
- else
- trainIdx.setTo(Scalar::all(-1));
+ trainIdx.setTo(Scalar::all(-1), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
}
- if (stream)
- stream.enqueueMemSet(nMatches, Scalar::all(0));
- else
- nMatches.setTo(Scalar::all(0));
+ nMatches.setTo(Scalar::all(0), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance);
}
- if (stream)
- stream.enqueueMemSet(nMatches, Scalar::all(0));
- else
- nMatches.setTo(Scalar::all(0));
+ nMatches.setTo(Scalar::all(0), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
if (roi.size() != src_size)
{
- if (stream)
- stream.enqueueMemSet(dst, Scalar::all(0));
- else
- dst.setTo(Scalar::all(0));
+ dst.setTo(Scalar::all(0), stream);
}
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
if (roi.size() != src_size)
{
- if (stream)
- stream.enqueueMemSet(dst, Scalar::all(0));
- else
- dst.setTo(Scalar::all(0));
+ dst.setTo(Scalar::all(0), stream);
}
ensureSizeIsEnough(src_size, bufType, *pbuf);
if (roi.size() != src_size)
{
- if (stream)
- stream.enqueueMemSet(dst, Scalar::all(0));
- else
- dst.setTo(Scalar::all(0));
+ dst.setTo(Scalar::all(0), stream);
}
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
{
- if (stream)
- stream.enqueueCopy(src, dst);
- else
- src.copyTo(dst);
+ src.copyTo(dst, stream);
return;
}
return;
}
- if (stream)
- {
- stream.enqueueConvert(image, buf.imagef, CV_32F);
- stream.enqueueConvert(templ, buf.templf, CV_32F);
- }
- else
- {
- image.convertTo(buf.imagef, CV_32F);
- templ.convertTo(buf.templf, CV_32F);
- }
+ image.convertTo(buf.imagef, CV_32F, stream);
+ templ.convertTo(buf.templf, CV_32F, stream);
+
matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
}
void matchTemplate_CCOFF_NORMED_8U(
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
{
- if (stream)
- {
- stream.enqueueConvert(image, buf.imagef, CV_32F);
- stream.enqueueConvert(templ, buf.templf, CV_32F);
- }
- else
- {
- image.convertTo(buf.imagef, CV_32F);
- templ.convertTo(buf.templf, CV_32F);
- }
+ image.convertTo(buf.imagef, CV_32F, stream);
+ templ.convertTo(buf.templf, CV_32F, stream);
matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
break;
}
- streams[0].enqueueConvert(frame0, frames_[0], CV_32F);
- streams[1].enqueueConvert(frame1, frames_[1], CV_32F);
+ frame0.convertTo(frames_[0], CV_32F, streams[0]);
+ frame1.convertTo(frames_[1], CV_32F, streams[1]);
if (fastPyramids)
{
{
gpu::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
gpu::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
- streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), scale);
- streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), scale);
+ curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
+ curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
}
else
{
- streams[0].enqueueMemSet(curFlowX, 0);
- streams[1].enqueueMemSet(curFlowY, 0);
+ curFlowX.setTo(0, streams[0]);
+ curFlowY.setTo(0, streams[1]);
}
}
else
{
gpu::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
gpu::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
- streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), 1./pyrScale);
- streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), 1./pyrScale);
+ curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale, streams[0]);
+ curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale, streams[1]);
}
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
{
cudev::optflow_farneback::gaussianBlurGpu(
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
- gpu::resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]);
+ gpu::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
cudev::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
}
}
if (&dst != &disp)
{
- if (stream)
- stream.enqueueCopy(disp, dst);
- else
- disp.copyTo(dst);
+ disp.copyTo(dst, stream);
}
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
if (rthis.levels & 1)
{
//can clear less area
- if (stream)
- {
- stream.enqueueMemSet(u, zero);
- stream.enqueueMemSet(d, zero);
- stream.enqueueMemSet(l, zero);
- stream.enqueueMemSet(r, zero);
- }
- else
- {
- u.setTo(zero);
- d.setTo(zero);
- l.setTo(zero);
- r.setTo(zero);
- }
+ u.setTo(zero, stream);
+ d.setTo(zero, stream);
+ l.setTo(zero, stream);
+ r.setTo(zero, stream);
}
if (rthis.levels > 1)
if ((rthis.levels & 1) == 0)
{
- if (stream)
- {
- stream.enqueueMemSet(u2, zero);
- stream.enqueueMemSet(d2, zero);
- stream.enqueueMemSet(l2, zero);
- stream.enqueueMemSet(r2, zero);
- }
- else
- {
- u2.setTo(zero);
- d2.setTo(zero);
- l2.setTo(zero);
- r2.setTo(zero);
- }
+ u2.setTo(zero, stream);
+ d2.setTo(zero, stream);
+ l2.setTo(zero, stream);
+ r2.setTo(zero, stream);
}
}
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
- if (stream)
- stream.enqueueMemSet(out, zero);
- else
- out.setTo(zero);
+ out.setTo(zero, stream);
output_callers[funcIdx](u, d, l, r, datas.front(), out, cudaStream);
if (disp.type() != CV_16S)
- {
- if (stream)
- stream.enqueueConvert(out, disp, disp.type());
- else
- out.convertTo(disp, disp.type());
- }
+ out.convertTo(disp, disp.type(), stream);
}
StereoBeliefPropagation& rthis;
load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
- if (stream)
- {
- stream.enqueueMemSet(l[0], zero);
- stream.enqueueMemSet(d[0], zero);
- stream.enqueueMemSet(r[0], zero);
- stream.enqueueMemSet(u[0], zero);
-
- stream.enqueueMemSet(l[1], zero);
- stream.enqueueMemSet(d[1], zero);
- stream.enqueueMemSet(r[1], zero);
- stream.enqueueMemSet(u[1], zero);
-
- stream.enqueueMemSet(data_cost, zero);
- stream.enqueueMemSet(data_cost_selected, zero);
- }
- else
- {
- l[0].setTo(zero);
- d[0].setTo(zero);
- r[0].setTo(zero);
- u[0].setTo(zero);
-
- l[1].setTo(zero);
- d[1].setTo(zero);
- r[1].setTo(zero);
- u[1].setTo(zero);
-
- data_cost.setTo(zero);
- data_cost_selected.setTo(zero);
- }
+ l[0].setTo(zero, stream);
+ d[0].setTo(zero, stream);
+ r[0].setTo(zero, stream);
+ u[0].setTo(zero, stream);
+
+ l[1].setTo(zero, stream);
+ d[1].setTo(zero, stream);
+ r[1].setTo(zero, stream);
+ u[1].setTo(zero, stream);
+
+ data_cost.setTo(zero, stream);
+ data_cost_selected.setTo(zero, stream);
int cur_idx = 0;
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
- if (stream)
- stream.enqueueMemSet(out, zero);
- else
- out.setTo(zero);
+ out.setTo(zero, stream);
compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step, out, nr_plane_pyr[0], cudaStream);
if (disp.type() != CV_16S)
{
- if (stream)
- stream.enqueueConvert(out, disp, disp.type());
- else
- out.convertTo(disp, disp.type());
+ out.convertTo(disp, disp.type(), stream);
}
}
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
{
- if (stream)
- stream.enqueueCopy(layer0_, outImg);
- else
- layer0_.copyTo(outImg);
+ layer0_.copyTo(outImg, stream);
}
float lastScale = 1.0f;
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
{
- if (stream)
- stream.enqueueCopy(curLayer, outImg);
- else
- curLayer.copyTo(outImg);
+ curLayer.copyTo(outImg, stream);
}
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
if (dsize == src.size())
{
- if (s)
- s.enqueueCopy(src, dst);
- else
- src.copyTo(dst);
+ src.copyTo(dst, s);
return;
}
void detect(cv::gpu::GpuMat& objects, cv::gpu::Stream& s) const
{
- if (s)
- s.enqueueMemSet(objects, 0);
- else
- cudaMemset(objects.data, 0, sizeof(Detection));
+ objects.setTo(Scalar::all(0), s);
cudaSafeCall( cudaGetLastError());
cv::gpu::GpuMat ndetections = cv::gpu::GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
- if (s)
- {
- s.enqueueMemSet(overlaps, 0);
- s.enqueueMemSet(suppressed, 0);
- }
- else
- {
- overlaps.setTo(0);
- suppressed.setTo(0);
- }
+ overlaps.setTo(0, s);
+ suppressed.setTo(0, s);
cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s);
cudev::suppress(objects, overlaps, ndetections, suppressed, stream);
cv::softcascade::cudev::shfl_integral(src, buffer, stream);
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
- if (s)
- s.enqueueMemSet(sum, cv::Scalar::all(0));
- else
- sum.setTo(cv::Scalar::all(0));
+ sum.setTo(cv::Scalar::all(0), s);
cv::gpu::GpuMat inner = sum(cv::Rect(1, 1, src.cols, src.rows));
cv::gpu::GpuMat res = buffer(cv::Rect(0, 0, src.cols, src.rows));
- if (s)
- s.enqueueCopy(res, inner);
- else
- res.copyTo(inner);
+ res.copyTo(inner, s);
}
else {CV_Error(cv::Error::GpuNotSupported, ": CC 3.x required.");}
}
}
else
{
- if (s)
- s.enqueueCopy(image, flds.hogluv);
- else
- image.copyTo(flds.hogluv);
+ image.copyTo(flds.hogluv, s);
}
flds.detect(objects, s);
inline void setZero(cv::gpu::GpuMat& m, cv::gpu::Stream& s)
{
- if (s)
- s.enqueueMemSet(m, 0);
- else
- m.setTo(0);
+ m.setTo(0, s);
}
struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
gpu::Stream stream;
- stream.enqueueConvert(b.gI1, b.t1, CV_32F);
- stream.enqueueConvert(b.gI2, b.t2, CV_32F);
+ b.gI1.convertTo(b.t1, CV_32F, stream);
+ b.gI2.convertTo(b.t2, CV_32F, stream);
gpu::split(b.t1, b.vI1, stream);
gpu::split(b.t2, b.vI2, stream);
for( int i = 0; i < b.gI1.channels(); ++i )
{
- gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, stream); // I2^2
- gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, stream); // I1^2
- gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, stream); // I1 * I2
+ gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, 1, -1, stream); // I2^2
+ gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, 1, -1, stream); // I1^2
+ gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, 1, -1, stream); // I1 * I2
gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
- gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream);
- gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream);
- gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream);
+ gpu::multiply(b.mu1, b.mu1, b.mu1_2, 1, -1, stream);
+ gpu::multiply(b.mu2, b.mu2, b.mu2_2, 1, -1, stream);
+ gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, 1, -1, stream);
gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, gpu::GpuMat(), -1, stream);