@param nLevels Number of computed levels. nLevels must be at least 2.
@param lowerLevel Lower boundary value of the lowest level.
@param upperLevel Upper boundary value of the greatest level.
+@param stream Stream for the asynchronous version.
*/
-CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel);
+CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
/** @brief Calculates a histogram with evenly distributed bins.
/** @brief Finds edges in an image using the @cite Canny86 algorithm.
@param image Single-channel 8-bit input image.
- @param edges Output edge map. It has the same size and type as image .
+ @param edges Output edge map. It has the same size and type as image.
+ @param stream Stream for the asynchronous version.
*/
- virtual void detect(InputArray image, OutputArray edges) = 0;
+ virtual void detect(InputArray image, OutputArray edges, Stream& stream = Stream::Null()) = 0;
/** @overload
@param dx First derivative of image in the vertical direction. Support only CV_32S type.
@param dy First derivative of image in the horizontal direction. Support only CV_32S type.
- @param edges Output edge map. It has the same size and type as image .
+ @param edges Output edge map. It has the same size and type as image.
+ @param stream Stream for the asynchronous version.
*/
- virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0;
+ virtual void detect(InputArray dx, InputArray dy, OutputArray edges, Stream& stream = Stream::Null()) = 0;
virtual void setLowThreshold(double low_thresh) = 0;
virtual double getLowThreshold() const = 0;
\f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
the image). \f$\theta\f$ is the line rotation angle in radians (
\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
+ @param stream Stream for the asynchronous version.
@sa HoughLines
*/
- virtual void detect(InputArray src, OutputArray lines) = 0;
+ virtual void detect(InputArray src, OutputArray lines, Stream& stream = Stream::Null()) = 0;
/** @brief Downloads results from cuda::HoughLinesDetector::detect to host memory.
@param d_lines Result of cuda::HoughLinesDetector::detect .
@param h_lines Output host array.
@param h_votes Optional output array for line's votes.
+ @param stream Stream for the asynchronous version.
*/
- virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0;
+ virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray(), Stream& stream = Stream::Null()) = 0;
virtual void setRho(float rho) = 0;
virtual float getRho() const = 0;
@param lines Output vector of lines. Each line is represented by a 4-element vector
\f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected
line segment.
+ @param stream Stream for the asynchronous version.
@sa HoughLinesP
*/
- virtual void detect(InputArray src, OutputArray lines) = 0;
+ virtual void detect(InputArray src, OutputArray lines, Stream& stream = Stream::Null()) = 0;
virtual void setRho(float rho) = 0;
virtual float getRho() const = 0;
@param src 8-bit, single-channel grayscale input image.
@param circles Output vector of found circles. Each vector is encoded as a 3-element
floating-point vector \f$(x, y, radius)\f$ .
+ @param stream Stream for the asynchronous version.
@sa HoughCircles
*/
- virtual void detect(InputArray src, OutputArray circles) = 0;
+ virtual void detect(InputArray src, OutputArray circles, Stream& stream = Stream::Null()) = 0;
virtual void setDp(float dp) = 0;
virtual float getDp() const = 0;
positions).
@param mask Optional region of interest. If the image is not empty (it needs to have the type
CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
+ @param stream Stream for the asynchronous version.
*/
- virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0;
+ virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray(), Stream& stream = Stream::Null()) = 0;
};
/** @brief Creates implementation for cuda::CornersDetector .
@param sp Spatial window radius.
@param sr Color window radius.
@param criteria Termination criteria. See TermCriteria.
-@param stream
+@param stream Stream for the asynchronous version.
It maps each point of the source image into another point. As a result, you have a new color and new
position of each point.
@param sp Spatial window radius.
@param sr Color window radius.
@param criteria Termination criteria. See TermCriteria.
-@param stream
+@param stream Stream for the asynchronous version.
@sa cuda::meanShiftFiltering
*/
@param sr Color window radius.
@param minsize Minimum segment size. Smaller segments are merged.
@param criteria Termination criteria. See TermCriteria.
+@param stream Stream for the asynchronous version.
*/
CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+ Stream& stream = Stream::Null());
/////////////////////////// Match Template ////////////////////////////
namespace canny
{
- void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
- void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
+ void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream);
+ void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream);
- void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh);
+ void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream);
- void edgesHysteresisLocal(PtrStepSzi map, short2* st1);
+ void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream);
- void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2);
+ void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream);
- void getEdges(PtrStepSzi map, PtrStepSzb dst);
+ void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream);
}
namespace
old_apperture_size_ = -1;
}
- void detect(InputArray image, OutputArray edges);
- void detect(InputArray dx, InputArray dy, OutputArray edges);
+ void detect(InputArray image, OutputArray edges, Stream& stream);
+ void detect(InputArray dx, InputArray dy, OutputArray edges, Stream& stream);
void setLowThreshold(double low_thresh) { low_thresh_ = low_thresh; }
double getLowThreshold() const { return low_thresh_; }
private:
void createBuf(Size image_size);
- void CannyCaller(GpuMat& edges);
+ void CannyCaller(GpuMat& edges, Stream& stream);
double low_thresh_;
double high_thresh_;
int old_apperture_size_;
};
- void CannyImpl::detect(InputArray _image, OutputArray _edges)
+ void CannyImpl::detect(InputArray _image, OutputArray _edges, Stream& stream)
{
GpuMat image = _image.getGpuMat();
image.locateROI(wholeSize, ofs);
GpuMat srcWhole(wholeSize, image.type(), image.datastart, image.step);
- canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_);
+ canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
}
else
{
#ifndef HAVE_OPENCV_CUDAFILTERS
throw_no_cuda();
#else
- filterDX_->apply(image, dx_);
- filterDY_->apply(image, dy_);
+ filterDX_->apply(image, dx_, stream);
+ filterDY_->apply(image, dy_, stream);
- canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
+ canny::calcMagnitude(dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
#endif
}
- CannyCaller(edges);
+ CannyCaller(edges, stream);
}
- void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges)
+ void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges, Stream& stream)
{
GpuMat dx = _dx.getGpuMat();
GpuMat dy = _dy.getGpuMat();
CV_Assert( dy.type() == dx.type() && dy.size() == dx.size() );
CV_Assert( deviceSupports(SHARED_ATOMICS) );
- dx.copyTo(dx_);
- dy.copyTo(dy_);
+ dx.copyTo(dx_, stream);
+ dy.copyTo(dy_, stream);
if (low_thresh_ > high_thresh_)
std::swap(low_thresh_, high_thresh_);
_edges.create(dx.size(), CV_8UC1);
GpuMat edges = _edges.getGpuMat();
- canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
+ canny::calcMagnitude(dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
- CannyCaller(edges);
+ CannyCaller(edges, stream);
}
void CannyImpl::createBuf(Size image_size)
ensureSizeIsEnough(1, image_size.area(), CV_16SC2, st2_);
}
- void CannyImpl::CannyCaller(GpuMat& edges)
+ void CannyImpl::CannyCaller(GpuMat& edges, Stream& stream)
{
map_.setTo(Scalar::all(0));
- canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_));
+ canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_), StreamAccessor::getStream(stream));
- canny::edgesHysteresisLocal(map_, st1_.ptr<short2>());
+ canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), StreamAccessor::getStream(stream));
- canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>());
+ canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), StreamAccessor::getStream(stream));
- canny::getEdges(map_, edges);
+ canny::getEdges(map_, edges, StreamAccessor::getStream(stream));
}
}
mag(y, x) = norm(dxVal, dyVal);
}
- void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
+ void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
if (L2Grad)
{
L2 norm;
- calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
+ calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
}
else
{
L1 norm;
- calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
+ calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
}
cudaSafeCall( cudaGetLastError() );
- cudaSafeCall(cudaThreadSynchronize());
+ if (stream == NULL)
+ cudaSafeCall( cudaDeviceSynchronize() );
}
- void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
+ void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
{
if (L2Grad)
{
L2 norm;
- transform(dx, dy, mag, norm, WithOutMask(), 0);
+ transform(dx, dy, mag, norm, WithOutMask(), stream);
}
else
{
L1 norm;
- transform(dx, dy, mag, norm, WithOutMask(), 0);
+ transform(dx, dy, mag, norm, WithOutMask(), stream);
}
}
}
map(y, x) = edge_type;
}
- void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
+ void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
bindTexture(&tex_mag, mag);
- calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
+ calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh);
cudaSafeCall( cudaGetLastError() );
- cudaSafeCall( cudaDeviceSynchronize() );
+ if (stream == NULL)
+ cudaSafeCall( cudaDeviceSynchronize() );
}
}
}
}
- void edgesHysteresisLocal(PtrStepSzi map, short2* st1)
+ void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
- cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
+ cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
const dim3 block(16, 16);
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
- edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
+ edgesHysteresisLocalKernel<<<grid, block, 0, stream>>>(map, st1);
cudaSafeCall( cudaGetLastError() );
- cudaSafeCall( cudaDeviceSynchronize() );
+ if (stream == NULL)
+ cudaSafeCall( cudaDeviceSynchronize() );
}
}
}
}
- void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2)
+ void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
int count;
- cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
+ cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
+ cudaSafeCall( cudaStreamSynchronize(stream) );
while (count > 0)
{
- cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
+ cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
- edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
+ edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, count);
cudaSafeCall( cudaGetLastError() );
- cudaSafeCall( cudaDeviceSynchronize() );
+ if (stream == NULL)
+ cudaSafeCall( cudaDeviceSynchronize() );
- cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
+ cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
+ cudaSafeCall( cudaStreamSynchronize(stream) );
count = min(count, map.cols * map.rows);
namespace canny
{
- void getEdges(PtrStepSzi map, PtrStepSzb dst)
+ void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream)
{
- transform(map, dst, GetEdges(), WithOutMask(), 0);
+ transform(map, dst, GetEdges(), WithOutMask(), stream);
}
}
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
int blockSize, bool useHarrisDetector, double harrisK);
- void detect(InputArray image, OutputArray corners, InputArray mask = noArray());
+ void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream);
private:
int maxCorners_;
cuda::createMinEigenValCorner(srcType, blockSize, 3);
}
- void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask)
+ void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
{
+ // TODO : implement async version
+ (void) stream;
+
using namespace cv::cuda::device::gfft;
GpuMat image = _image.getGpuMat();
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
-void cv::cuda::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
+void cv::cuda::evenLevels(OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
};
}
-void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
+void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream)
{
const int kind = _levels.kind();
nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
if (kind == _InputArray::CUDA_GPU_MAT)
- _levels.getGpuMatRef().upload(host_levels);
+ _levels.getGpuMatRef().upload(host_levels, stream);
}
namespace hist
public:
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
- void detect(InputArray src, OutputArray circles);
+ void detect(InputArray src, OutputArray circles, Stream& stream);
void setDp(float dp) { dp_ = dp; }
float getDp() const { return dp_; }
filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
}
- void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
+ void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles, Stream& stream)
{
+ // TODO : implement async version
+ (void) stream;
+
using namespace cv::cuda::device::hough;
using namespace cv::cuda::device::hough_circles;
{
}
- void detect(InputArray src, OutputArray lines);
- void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
+ void detect(InputArray src, OutputArray lines, Stream& stream);
+ void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes, Stream& stream);
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
GpuMat result_;
};
- void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
+ void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines, Stream& stream)
{
+ // TODO : implement async version
+ (void) stream;
+
using namespace cv::cuda::device::hough;
using namespace cv::cuda::device::hough_lines;
result_.copyTo(lines);
}
- void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
+ void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes, Stream& stream)
{
GpuMat d_lines = _d_lines.getGpuMat();
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
- d_lines.row(0).download(h_lines);
+ if (stream)
+ d_lines.row(0).download(h_lines, stream);
+ else
+ d_lines.row(0).download(h_lines);
if (h_votes.needed())
{
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
- d_votes.download(h_votes);
+ if (stream)
+ d_votes.download(h_votes, stream);
+ else
+ d_votes.download(h_votes);
}
}
}
{
}
- void detect(InputArray src, OutputArray lines);
+ void detect(InputArray src, OutputArray lines, Stream& stream);
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
GpuMat result_;
};
- void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
+ void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines, Stream& stream)
{
+ // TODO : implement async version
+ (void) stream;
+
using namespace cv::cuda::device::hough;
using namespace cv::cuda::device::hough_lines;
using namespace cv::cuda::device::hough_segments;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
+void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
#else
} // anonymous namespace
-void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
+void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria, Stream& stream)
{
GpuMat src = _src.getGpuMat();
// Perform mean shift procedure and obtain region and spatial maps
GpuMat d_rmap, d_spmap;
- cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
+ cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria, stream);
+
+ stream.waitForCompletion();
+
Mat rmap(d_rmap);
Mat spmap(d_spmap);