void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }\r
Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); }\r
void cv::gpu::minMax(const GpuMat&, double*, double*) { throw_nogpu(); }\r
+void cv::gou::minMax(const GpuMat&, double*, double*, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*) { throw_nogpu(); }\r
void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::exp(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
\r
namespace cv { namespace gpu { namespace mathfunc { namespace minmax {\r
\r
- void get_buf_size_required(int elem_size, int& b1cols, int& b1rows, \r
- int& b2cols, int& b2rows);\r
+ void get_buf_size_required(int elem_size, int& cols, int& rows);\r
\r
template <typename T> \r
- void min_max_caller(const DevMem2D src, double* minval, double* maxval, \r
- unsigned char* minval_buf, unsigned char* maxval_buf);\r
+ void min_max_caller(const DevMem2D src, double* minval, double* maxval, PtrStep buf);\r
\r
template <typename T> \r
- void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, \r
- unsigned char* minval_buf, unsigned char* maxval_buf);\r
+ void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, PtrStep buf);\r
\r
}}}}\r
\r
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)\r
{\r
+ GpuMat buf;\r
+ minMax(src, minVal, maxVal, buf);\r
+}\r
+\r
+\r
+void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, GpuMat& buf)\r
+{\r
using namespace mathfunc::minmax;\r
\r
double maxVal_;\r
if (!maxVal) maxVal = &maxVal_;\r
\r
GpuMat src_ = src.reshape(1);\r
-\r
- // Allocate GPU buffers\r
- Size b1size, b2size;\r
- get_buf_size_required(src.elemSize(), b1size.width, b1size.height, b2size.width, b2size.height);\r
- GpuMat b1(b1size, CV_8U), b2(b2size, CV_8U);\r
+ \r
+ Size bufSize;\r
+ get_buf_size_required(src.elemSize(), bufSize.width, bufSize.height);\r
+ buf.create(bufSize, CV_8U);\r
\r
int major, minor;\r
getComputeCapability(getDevice(), major, minor);\r
- \r
+ \r
if (major >= 1 && minor >= 1)\r
{\r
switch (src_.type())\r
{\r
- case CV_8U: min_max_caller<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_8S: min_max_caller<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_16U: min_max_caller<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_16S: min_max_caller<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_32S: min_max_caller<int>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_32F: min_max_caller<float>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_64F: min_max_caller<double>(src_, minVal, maxVal, b1.data, b2.data); break;\r
+ case CV_8U: min_max_caller<unsigned char>(src_, minVal, maxVal, buf); break;\r
+ case CV_8S: min_max_caller<signed char>(src_, minVal, maxVal, buf); break;\r
+ case CV_16U: min_max_caller<unsigned short>(src_, minVal, maxVal, buf); break;\r
+ case CV_16S: min_max_caller<signed short>(src_, minVal, maxVal, buf); break;\r
+ case CV_32S: min_max_caller<int>(src_, minVal, maxVal, buf); break;\r
+ case CV_32F: min_max_caller<float>(src_, minVal, maxVal, buf); break;\r
+ case CV_64F: min_max_caller<double>(src_, minVal, maxVal, buf); break;\r
default: CV_Error(CV_StsBadArg, "Unsupported type");\r
}\r
}\r
{\r
switch (src_.type())\r
{\r
- case CV_8U: min_max_caller_2steps<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_8S: min_max_caller_2steps<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_16U: min_max_caller_2steps<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_16S: min_max_caller_2steps<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_32S: min_max_caller_2steps<int>(src_, minVal, maxVal, b1.data, b2.data); break;\r
- case CV_32F: min_max_caller_2steps<float>(src_, minVal, maxVal, b1.data, b2.data); break;\r
+ case CV_8U: min_max_caller_2steps<unsigned char>(src_, minVal, maxVal, buf); break;\r
+ case CV_8S: min_max_caller_2steps<signed char>(src_, minVal, maxVal, buf); break;\r
+ case CV_16U: min_max_caller_2steps<unsigned short>(src_, minVal, maxVal, buf); break;\r
+ case CV_16S: min_max_caller_2steps<signed short>(src_, minVal, maxVal, buf); break;\r
+ case CV_32S: min_max_caller_2steps<int>(src_, minVal, maxVal, buf); break;\r
+ case CV_32F: min_max_caller_2steps<float>(src_, minVal, maxVal, buf); break;\r
default: CV_Error(CV_StsBadArg, "Unsupported type");\r
}\r
}\r
//////////////////////////////////////////////////////////////////////////////\r
// Min max\r
\r
- // To avoid shared banck confilict we convert reach value into value of \r
+ // To avoid shared bank conflicts we convert each value into value of \r
// appropriate type (32 bits minimum)\r
template <typename T> struct MinMaxTypeTraits {};\r
template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; };\r
\r
static const unsigned int czero = 0;\r
\r
+ // Global counter of blocks finished its work\r
+ __device__ unsigned int blocks_finished;\r
+\r
+\r
// Estimates good thread configuration\r
// - threads variable satisfies to threads.x * threads.y == 256\r
void estimate_thread_cfg(dim3& threads, dim3& grid)\r
grid = dim3(6, 5);\r
}\r
\r
+\r
// Returns required buffer sizes\r
- void get_buf_size_required(int elem_size, int& b1cols, int& b1rows, int& b2cols, int& b2rows)\r
+ void get_buf_size_required(int elem_size, int& cols, int& rows)\r
{\r
dim3 threads, grid;\r
estimate_thread_cfg(threads, grid);\r
- b1cols = grid.x * grid.y * elem_size; b1rows = 1;\r
- b2cols = grid.x * grid.y * elem_size; b2rows = 1;\r
+ cols = grid.x * grid.y * elem_size; \r
+ rows = 2;\r
}\r
\r
+\r
// Estimates device constants which are used in the kernels using specified thread configuration\r
void estimate_kernel_consts(int cols, int rows, const dim3& threads, const dim3& grid)\r
{ \r
cudaSafeCall(cudaMemcpyToSymbol(ctheight, &theight, sizeof(ctheight))); \r
} \r
\r
+\r
// Does min and max in shared memory\r
template <typename T>\r
__device__ void merge(unsigned int tid, unsigned int offset, volatile T* minval, volatile T* maxval)\r
maxval[tid] = max(maxval[tid], maxval[tid + offset]);\r
}\r
\r
- // Global counter of blocks finished its work\r
- __device__ unsigned int blocks_finished;\r
\r
template <int nthreads, typename T>\r
__global__ void min_max_kernel(int cols, int rows, const PtrStep src, T* minval, T* maxval)\r
#endif\r
}\r
\r
- // This kernel will be used only when compute capability is 1.0\r
- template <typename T>\r
- __global__ void min_max_kernel_2ndstep(T* minval, T* maxval, int size)\r
- {\r
- T val;\r
- T mymin = numeric_limits_gpu<T>::max();\r
- T mymax = numeric_limits_gpu<T>::min();\r
- for (unsigned int i = 0; i < size; ++i)\r
- { \r
- val = minval[i]; if (val < mymin) mymin = val;\r
- val = maxval[i]; if (val > mymax) mymax = val;\r
- }\r
- minval[0] = mymin;\r
- maxval[0] = mymax;\r
- }\r
\r
template <typename T>\r
- void min_max_caller(const DevMem2D src, double* minval, double* maxval, \r
- unsigned char* minval_buf, unsigned char* maxval_buf)\r
+ void min_max_caller(const DevMem2D src, double* minval, double* maxval, PtrStep buf)\r
{\r
dim3 threads, grid;\r
estimate_thread_cfg(threads, grid);\r
estimate_kernel_consts(src.cols, src.rows, threads, grid);\r
\r
- cudaSafeCall(cudaMemcpyToSymbol(blocks_finished, &czero, sizeof(blocks_finished)));\r
- min_max_kernel<256, T><<<grid, threads>>>(src.cols, src.rows, src, (T*)minval_buf, (T*)maxval_buf);\r
+ T* minval_buf = (T*)buf.ptr(0);\r
+ T* maxval_buf = (T*)buf.ptr(1);\r
\r
+ cudaSafeCall(cudaMemcpyToSymbol(blocks_finished, &czero, sizeof(blocks_finished)));\r
+ min_max_kernel<256, T><<<grid, threads>>>(src.cols, src.rows, src, minval_buf, maxval_buf);\r
cudaSafeCall(cudaThreadSynchronize());\r
\r
T minval_, maxval_;\r
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));\r
*minval = minval_;\r
*maxval = maxval_;\r
+ } \r
+\r
+ template void min_max_caller<unsigned char>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller<signed char>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller<unsigned short>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller<signed short>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller<int>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller<float>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller<double>(const DevMem2D, double*, double*, PtrStep);\r
+\r
+\r
+ // This kernel will be used only when compute capability is 1.0\r
+ template <typename T>\r
+ __global__ void min_max_kernel_2ndstep(T* minval, T* maxval, int size)\r
+ {\r
+ T val;\r
+ T mymin = numeric_limits_gpu<T>::max();\r
+ T mymax = numeric_limits_gpu<T>::min();\r
+ for (unsigned int i = 0; i < size; ++i)\r
+ { \r
+ val = minval[i]; if (val < mymin) mymin = val;\r
+ val = maxval[i]; if (val > mymax) mymax = val;\r
+ }\r
+ minval[0] = mymin;\r
+ maxval[0] = mymax;\r
}\r
\r
+\r
template <typename T>\r
- void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, \r
- unsigned char* minval_buf, unsigned char* maxval_buf)\r
+ void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, PtrStep buf)\r
{\r
dim3 threads, grid;\r
estimate_thread_cfg(threads, grid);\r
estimate_kernel_consts(src.cols, src.rows, threads, grid);\r
\r
+ T* minval_buf = (T*)buf.ptr(0);\r
+ T* maxval_buf = (T*)buf.ptr(1);\r
+\r
cudaSafeCall(cudaMemcpyToSymbol(blocks_finished, &czero, sizeof(blocks_finished)));\r
- min_max_kernel<256, T><<<grid, threads>>>(src.cols, src.rows, src, (T*)minval_buf, (T*)maxval_buf);\r
- min_max_kernel_2ndstep<T><<<1, 1>>>((T*)minval_buf, (T*)maxval_buf, grid.x * grid.y);\r
+ min_max_kernel<256, T><<<grid, threads>>>(src.cols, src.rows, src, minval_buf, maxval_buf);\r
+ min_max_kernel_2ndstep<T><<<1, 1>>>(minval_buf, maxval_buf, grid.x * grid.y);\r
cudaSafeCall(cudaThreadSynchronize());\r
\r
T minval_, maxval_;\r
*maxval = maxval_;\r
}\r
\r
- template void min_max_caller<unsigned char>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller<signed char>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller<unsigned short>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller<signed short>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller<int>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller<float>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller<double>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
-\r
- template void min_max_caller_2steps<unsigned char>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller_2steps<signed char>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller_2steps<unsigned short>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller_2steps<signed short>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller_2steps<int>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
- template void min_max_caller_2steps<float>(const DevMem2D, double*, double*, unsigned char*, unsigned char*);\r
+ template void min_max_caller_2steps<unsigned char>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller_2steps<signed char>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller_2steps<unsigned short>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller_2steps<signed short>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller_2steps<int>(const DevMem2D, double*, double*, PtrStep);\r
+ template void min_max_caller_2steps<float>(const DevMem2D, double*, double*, PtrStep);\r
\r
} // namespace minmax\r
\r
+\r
namespace minmaxloc {\r
\r
template <typename T, int op> struct OptLoc {};\r