// Does mean shift filtering on GPU.\r
CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));\r
\r
+ CV_EXPORTS void colorizeDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp);\r
+\r
//////////////////////////////// StereoBM_GPU ////////////////////////////////\r
\r
class CV_EXPORTS StereoBM_GPU\r
//! Acync version\r
void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream);\r
\r
+ private:\r
int ndisp;\r
int radius;\r
int iters;\r
float edge_threshold;\r
float max_disc_threshold;\r
float sigma_range;\r
- private:\r
- std::vector<float> table_color;\r
+\r
+ GpuMat table_color;\r
GpuMat table_space;\r
};\r
}\r
\r
namespace cv { namespace gpu { namespace bf \r
{\r
- void calc_space_weighted_filter_gpu(const DevMem2Df& table_space, int half, float dist_space, cudaStream_t stream);\r
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc);\r
\r
void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream);\r
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;\r
const float DEFAULT_SIGMA_RANGE = 10.0f;\r
\r
- inline void calc_color_weighted_table(vector<float>& table_color, float sigma_range, int len)\r
+ inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)\r
{\r
- float* color_table_x;\r
- \r
- table_color.resize(len);\r
- color_table_x = &table_color[0];\r
+ Mat cpu_table_color(1, len, CV_32F);\r
\r
- for(int y = 0; y < len; y++) \r
- table_color[y] = static_cast<float>(std::exp(-double(y * y) / (2 * sigma_range * sigma_range)));\r
+ float* line = cpu_table_color.ptr<float>();\r
+\r
+ for(int i = 0; i < len; i++) \r
+ line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));\r
+\r
+ table_color.upload(cpu_table_color);\r
}\r
\r
- inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space, cudaStream_t stream)\r
+ inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)\r
{\r
int half = (win_size >> 1);\r
- table_space.create(half + 1, half + 1, CV_32F);\r
\r
- bf::calc_space_weighted_filter_gpu(table_space, half, dist_space, stream);\r
+ Mat cpu_table_space(half + 1, half + 1, CV_32F);\r
+\r
+ for (int y = 0; y <= half; ++y)\r
+ {\r
+ float* row = cpu_table_space.ptr<float>(y);\r
+ for (int x = 0; x <= half; ++x)\r
+ row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);\r
+ }\r
+\r
+ table_space.upload(cpu_table_space);\r
}\r
\r
template <typename T>\r
- void bilateral_filter_operator(DisparityBilateralFilter& rthis, vector<float>& table_color, GpuMat& table_space, \r
+ void bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold, \r
+ GpuMat& table_color, GpuMat& table_space, \r
const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream)\r
{\r
- calc_color_weighted_table(table_color, rthis.sigma_range, 255);\r
- calc_space_weighted_filter(table_space, rthis.radius * 2 + 1, rthis.radius + 1.0f, stream);\r
-\r
- short edge_disc = max<short>(short(1), short(rthis.ndisp * rthis.edge_threshold + 0.5));\r
- short max_disc = short(rthis.ndisp * rthis.max_disc_threshold + 0.5);\r
+ short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));\r
+ short max_disc = short(ndisp * max_disc_threshold + 0.5);\r
\r
- float* table_color_dev;\r
- cudaSafeCall( cudaMalloc((void**)&table_color_dev, table_color.size() * sizeof(float)) );\r
- cudaSafeCall( cudaMemcpy(table_color_dev, &table_color[0], table_color.size() * sizeof(float), cudaMemcpyHostToDevice) );\r
- bf::load_constants(table_color_dev, table_space, rthis.ndisp, rthis.radius, edge_disc, max_disc);\r
+ bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);\r
\r
if (&dst != &disp)\r
disp.copyTo(dst);\r
\r
- bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), rthis.iters, stream);\r
-\r
- cudaSafeCall( cudaFree(table_color_dev) );\r
+ bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, stream);\r
}\r
\r
- typedef void (*bilateral_filter_operator_t)(DisparityBilateralFilter& rthis, vector<float>& table_color, GpuMat& table_space, \r
- const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream);\r
+ typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, \r
+ GpuMat& table_color, GpuMat& table_space, \r
+ const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream);\r
\r
const bilateral_filter_operator_t operators[] = \r
{bilateral_filter_operator<unsigned char>, 0, 0, bilateral_filter_operator<short>, 0, 0, 0, 0};\r
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),\r
sigma_range(DEFAULT_SIGMA_RANGE)\r
{\r
+ calc_color_weighted_table(table_color, sigma_range, 255);\r
+ calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);\r
}\r
\r
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_, \r
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_), \r
sigma_range(sigma_range_)\r
{\r
+ calc_color_weighted_table(table_color, sigma_range, 255);\r
+ calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);\r
}\r
\r
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst)\r
{\r
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);\r
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));\r
- operators[disp.type()](*this, table_color, table_space, disp, img, dst, 0);\r
+ operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, 0);\r
}\r
\r
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)\r
{\r
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);\r
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));\r
- operators[disp.type()](*this, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream));\r
+ operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream));\r
}\r
\r
#endif /* !defined (HAVE_CUDA) */\r
\r
namespace bf_krnls\r
{\r
- __global__ void calc_space_weighted_filter(float* table_space, size_t step, int half, float dist_space)\r
- {\r
- int x = blockIdx.x * blockDim.x + threadIdx.x;\r
- int y = blockIdx.y * blockDim.y + threadIdx.y;\r
-\r
- if (y <= half && x <= half)\r
- *(table_space + y * step + x) = expf(-sqrtf(float(y * y) + float(x * x)) / dist_space);\r
- }\r
-}\r
-\r
-namespace cv { namespace gpu { namespace bf \r
-{\r
- void calc_space_weighted_filter_gpu(const DevMem2Df& table_space, int half, float dist_space, cudaStream_t stream)\r
- {\r
- dim3 threads(32, 8, 1);\r
- dim3 grid(1, 1, 1);\r
- grid.x = divUp(half + 1, threads.x);\r
- grid.x = divUp(half + 1, threads.y);\r
-\r
- bf_krnls::calc_space_weighted_filter<<<grid, threads, 0, stream>>>(table_space.ptr, table_space.step/sizeof(float), half, dist_space);\r
-\r
- if (stream != 0)\r
- cudaSafeCall( cudaThreadSynchronize() );\r
- }\r
-}}}\r
-\r
-namespace bf_krnls\r
-{\r
__constant__ float* ctable_color;\r
__constant__ float* ctable_space;\r
__constant__ size_t ctable_space_step;\r
}\r
}}}\r
\r
+/////////////////////////////////// colorizeDisp ///////////////////////////////////////////////\r
\r
+namespace imgproc\r
+{\r
+ template <typename T>\r
+ __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)\r
+ { \r
+ unsigned int H = ((ndisp-d) * 240)/ndisp;\r
+\r
+ unsigned int hi = (H/60) % 6;\r
+ float f = H/60.f - H/60;\r
+ float p = V * (1 - S);\r
+ float q = V * (1 - f * S);\r
+ float t = V * (1 - (1 - f) * S);\r
+\r
+ float3 res;\r
+ \r
+ if (hi == 0) //R = V, G = t, B = p\r
+ {\r
+ res.x = p;\r
+ res.y = t;\r
+ res.z = V;\r
+ }\r
+\r
+ if (hi == 1) // R = q, G = V, B = p\r
+ {\r
+ res.x = p;\r
+ res.y = V;\r
+ res.z = q;\r
+ } \r
+ \r
+ if (hi == 2) // R = p, G = V, B = t\r
+ {\r
+ res.x = t;\r
+ res.y = V;\r
+ res.z = p;\r
+ }\r
+ \r
+ if (hi == 3) // R = p, G = q, B = V\r
+ {\r
+ res.x = V;\r
+ res.y = q;\r
+ res.z = p;\r
+ }\r
+\r
+ if (hi == 4) // R = t, G = p, B = V\r
+ {\r
+ res.x = V;\r
+ res.y = p;\r
+ res.z = t;\r
+ }\r
+\r
+ if (hi == 5) // R = V, G = p, B = q\r
+ {\r
+ res.x = q;\r
+ res.y = p;\r
+ res.z = V;\r
+ }\r
+ unsigned int b = (unsigned int)(max(0.f, min (res.x, 1.f)) * 255.f);\r
+ unsigned int g = (unsigned int)(max(0.f, min (res.y, 1.f)) * 255.f);\r
+ unsigned int r = (unsigned int)(max(0.f, min (res.z, 1.f)) * 255.f);\r
+\r
+ return (r << 16) + (g << 8) + b; \r
+ } \r
+\r
+ __global__ void colorizeDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)\r
+ {\r
+ const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;\r
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
+\r
+ if(x < width && y < height) \r
+ {\r
+ uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);\r
+\r
+ uint4 res;\r
+ res.x = cvtPixel(d4.x, ndisp);\r
+ res.y = cvtPixel(d4.y, ndisp);\r
+ res.z = cvtPixel(d4.z, ndisp);\r
+ res.w = cvtPixel(d4.w, ndisp);\r
+ \r
+ uint4* line = (uint4*)(out_image + y * out_step);\r
+ line[x >> 2] = res;\r
+ }\r
+ }\r
+\r
+ __global__ void colorizeDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)\r
+ {\r
+ const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;\r
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
+\r
+ if(x < width && y < height) \r
+ {\r
+ short2 d2 = *(short2*)(disp + y * disp_step + x);\r
+\r
+ uint2 res;\r
+ res.x = cvtPixel(d2.x, ndisp); \r
+ res.y = cvtPixel(d2.y, ndisp);\r
+\r
+ uint2* line = (uint2*)(out_image + y * out_step);\r
+ line[x >> 1] = res;\r
+ }\r
+ }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace impl \r
+{\r
+ void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp)\r
+ {\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+ grid.x = divUp(src.cols, threads.x << 2);\r
+ grid.y = divUp(src.rows, threads.y);\r
+ \r
+ imgproc::colorizeDisp<<<grid, threads>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);\r
+ cudaThreadSynchronize(); \r
+ }\r
+\r
+ void colorizeDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp)\r
+ {\r
+ dim3 threads(32, 8, 1);\r
+ dim3 grid(1, 1, 1);\r
+ grid.x = divUp(src.cols, threads.x << 1);\r
+ grid.y = divUp(src.rows, threads.y);\r
+ \r
+ imgproc::colorizeDisp<<<grid, threads>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);\r
+ cudaThreadSynchronize();\r
+ }\r
+}}}\r
\r
#if !defined (HAVE_CUDA)\r
\r
-void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
+void cv::gpu::remap(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, int, int, TermCriteria ) { throw_nogpu(); }\r
+void cv::gpu::colorizeDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst);\r
\r
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
+\r
+ void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp);\r
+ void colorizeDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp);\r
}\r
}}\r
\r
impl::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); \r
}\r
\r
+namespace\r
+{\r
+ template <typename T>\r
+ void colorizeDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp)\r
+ {\r
+ impl::colorizeDisp_gpu((DevMem2D_<T>)src, dst, ndisp);\r
+ }\r
+}\r
+\r
+void cv::gpu::colorizeDisp(const GpuMat& src, GpuMat& dst, int ndisp)\r
+{\r
+ typedef void (*colorizeDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp);\r
+\r
+ static const colorizeDisp_caller_t callers[] = {colorizeDisp_caller<uchar>, 0, 0, colorizeDisp_caller<short>, 0, 0, 0, 0};\r
+ CV_Assert(src.type() == CV_8U || src.type() == CV_16S);\r
+\r
+ GpuMat out;\r
+ if (&dst != &src)\r
+ out = dst;\r
+ out.create(src.size(), CV_8UC4);\r
+ \r
+ callers[src.type()](src, out, ndisp);\r
+ dst = out;\r
+}\r
\r
#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file