From 6d86d63ac59a9e581c0680c8a9580f75d9d98d4f Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Tue, 1 Jul 2014 10:44:52 -0700 Subject: [PATCH] Last of csbp load_constants() gone. --- modules/cudastereo/src/cuda/stereocsbp.cu | 30 +++++++++++------------------- modules/cudastereo/src/cuda/stereocsbp.hpp | 4 +--- modules/cudastereo/src/stereocsbp.cpp | 6 ++---- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index fc6f4f3..04f6cac 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -58,18 +58,10 @@ namespace cv { namespace cuda { namespace device /////////////////////// load constants //////////////////////// /////////////////////////////////////////////////////////////// - __constant__ float cdisc_single_jump; - __constant__ size_t cmsg_step; __constant__ size_t cdisp_step1; __constant__ size_t cdisp_step2; - - void load_constants(float disc_single_jump) - { - cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); - } - /////////////////////////////////////////////////////////////// /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// @@ -670,7 +662,7 @@ namespace cv { namespace cuda { namespace device template __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3, - const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, volatile T* temp) + const T* dst_disp, const T* src_disp, int nr_plane, int max_disc_term, float disc_single_jump, volatile T* temp) { T minimum = numeric_limits::max(); @@ -692,7 +684,7 @@ namespace cv { namespace cuda { namespace device T src_disp_reg = src_disp[d * cdisp_step1]; for(int d2 = 0; d2 < nr_plane; d2++) - cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)); + cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + disc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)); temp[d * cdisp_step1] = saturate_cast(cost_min); sum += cost_min; @@ -704,7 +696,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term) + __global__ void compute_message(uchar *ctemp, T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i, int max_disc_term, float disc_single_jump) { int y = blockIdx.y * blockDim.y + threadIdx.y; int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1); @@ -722,17 +714,17 @@ namespace cv { namespace cuda { namespace device T* temp = (T*)ctemp + y * cmsg_step + x; - message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, max_disc_term, temp); - message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, max_disc_term, temp); - message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, temp); - message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, temp); + message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, max_disc_term, disc_single_jump, temp); + message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, max_disc_term, disc_single_jump, temp); } } template void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream) + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream) { size_t disp_step = msg_step * h; cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); @@ -746,7 +738,7 @@ namespace cv { namespace cuda { namespace device for(int t = 0; t < iters; ++t) { - compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term); + compute_message<<>>(ctemp, u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1, max_disc_term, disc_single_jump); cudaSafeCall( cudaGetLastError() ); } if (stream == 0) @@ -754,10 +746,10 @@ namespace cv { namespace cuda { namespace device }; template void calc_all_iterations(uchar *ctemp, short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, - int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); + int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream); template void calc_all_iterations(uchar *ctemp, float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, - int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); + int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream); /////////////////////////////////////////////////////////////// diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 95c5a47..3054972 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,8 +2,6 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(float disc_single_jump); - template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, float data_weight, float max_data_term, int min_disp, bool use_local_init_data_cost, cudaStream_t stream); @@ -22,7 +20,7 @@ namespace cv { namespace cuda { namespace device template void calc_all_iterations(uchar *ctemp, T* u, T* d, T* l, T* r, const T* data_cost_selected, - const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, cudaStream_t stream); + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, int max_disc_term, float disc_single_jump, cudaStream_t stream); template void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index 2515061..ded5fa2 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,8 +222,6 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(disc_single_jump_); - l[0].setTo(0, _stream); d[0].setTo(0, _stream); r[0].setTo(0, _stream); @@ -267,7 +265,7 @@ namespace calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, stream); + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, disc_single_jump_, stream); } } else @@ -298,7 +296,7 @@ namespace calc_all_iterations(temp_.ptr(), u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, stream); + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, disc_single_jump_, stream); } } -- 2.7.4