From 87f3451ec63a4c24d377c8821802ef0247e75d5e Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 17 Oct 2011 06:44:40 +0000 Subject: [PATCH] fixed warnings --- modules/gpu/src/cuda/bf_knnmatch.cu | 45 ++++++++++++++++++++++----------- modules/gpu/src/cuda/bf_match.cu | 45 ++++++++++++++++++++++----------- modules/gpu/src/cuda/bf_radius_match.cu | 35 +++++++++++++++---------- 3 files changed, 81 insertions(+), 44 deletions(-) diff --git a/modules/gpu/src/cuda/bf_knnmatch.cu b/modules/gpu/src/cuda/bf_knnmatch.cu index fb7004e..f53af9e 100644 --- a/modules/gpu/src/cuda/bf_knnmatch.cu +++ b/modules/gpu/src/cuda/bf_knnmatch.cu @@ -43,6 +43,7 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/vec_distance.hpp" +#include "opencv2/gpu/device/datamov_utils.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -235,7 +236,15 @@ namespace cv { namespace gpu { namespace bf_knnmatch { const int loadX = threadIdx.x + i * BLOCK_SIZE; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = loadX < train.cols ? train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX] : 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + + if (loadX < train.cols) + { + T val; + + ForceGlob::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; + } __syncthreads(); @@ -402,15 +411,18 @@ namespace cv { namespace gpu { namespace bf_knnmatch { const int loadX = threadIdx.x + i * BLOCK_SIZE; + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + if (loadX < query.cols) { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX]; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; - } - else - { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + T val; + + ForceGlob::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val); + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; + + ForceGlob::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; } __syncthreads(); @@ -573,15 +585,18 @@ namespace cv { namespace gpu { namespace bf_knnmatch { const int loadX = threadIdx.x + i * BLOCK_SIZE; + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + if (loadX < query.cols) { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX]; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; - } - else - { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + T val; + + ForceGlob::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val); + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; + + ForceGlob::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; } __syncthreads(); diff --git a/modules/gpu/src/cuda/bf_match.cu b/modules/gpu/src/cuda/bf_match.cu index 2c7f74a..e46939f 100644 --- a/modules/gpu/src/cuda/bf_match.cu +++ b/modules/gpu/src/cuda/bf_match.cu @@ -43,6 +43,7 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/vec_distance.hpp" +#include "opencv2/gpu/device/datamov_utils.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -110,7 +111,15 @@ namespace cv { namespace gpu { namespace bf_match { const int loadX = threadIdx.x + i * BLOCK_SIZE; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = loadX < train.cols ? train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX] : 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + + if (loadX < train.cols) + { + T val; + + ForceGlob::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; + } __syncthreads(); @@ -258,15 +267,18 @@ namespace cv { namespace gpu { namespace bf_match { const int loadX = threadIdx.x + i * BLOCK_SIZE; + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + if (loadX < query.cols) { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX]; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; - } - else - { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + T val; + + ForceGlob::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val); + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; + + ForceGlob::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; } __syncthreads(); @@ -410,15 +422,18 @@ namespace cv { namespace gpu { namespace bf_match { const int loadX = threadIdx.x + i * BLOCK_SIZE; + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + if (loadX < query.cols) { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX]; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; - } - else - { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + T val; + + ForceGlob::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val); + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; + + ForceGlob::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; } __syncthreads(); diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpu/src/cuda/bf_radius_match.cu index 4cef2bc..e350075 100644 --- a/modules/gpu/src/cuda/bf_radius_match.cu +++ b/modules/gpu/src/cuda/bf_radius_match.cu @@ -43,6 +43,7 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/vec_distance.hpp" +#include "opencv2/gpu/device/datamov_utils.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -73,15 +74,18 @@ namespace cv { namespace gpu { namespace bf_radius_match { const int loadX = threadIdx.x + i * BLOCK_SIZE; + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + if (loadX < query.cols) { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX]; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; - } - else - { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + T val; + + ForceGlob::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val); + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; + + ForceGlob::Load(train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; } __syncthreads(); @@ -181,15 +185,18 @@ namespace cv { namespace gpu { namespace bf_radius_match { const int loadX = threadIdx.x + i * BLOCK_SIZE; + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + if (loadX < query.cols) { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX]; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; - } - else - { - s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; - s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; + T val; + + ForceGlob::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val); + s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; + + ForceGlob::Load(train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); + s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; } __syncthreads(); -- 2.7.4