smem[tid] = res.x;\r
smem[tid + nthreads] = res.y;\r
smem[tid + 2 * nthreads] = res.z;\r
- smem[tid + 3 * nthreads] = res.z;\r
+ smem[tid + 3 * nthreads] = res.w;\r
__syncthreads();\r
\r
sumInSmem<nthreads, R>(smem, tid);\r
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));\r
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(\r
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);\r
+ break;\r
case 2:\r
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(\r
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));\r
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(\r
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);\r
+ break;\r
case 3:\r
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(\r
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));\r
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(\r
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);\r
+ break;\r
case 4:\r
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(\r
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));\r
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(\r
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);\r
+ break;\r
}\r
cudaSafeCall(cudaThreadSynchronize());\r
\r
\r
CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature)\r
{\r
- if (feature == NATIVE_DOUBLE)\r
- return ::compareToSet(CUDA_ARCH_FEATURES, 13, std::greater_equal<int>());\r
- if (feature == ATOMICS)\r
- return ::compareToSet(CUDA_ARCH_FEATURES, 11, std::greater_equal<int>());\r
- return true;\r
+ return ::compareToSet(CUDA_ARCH_FEATURES, feature, std::greater_equal<int>());\r
}\r
\r
\r
// sum\r
//\r
\r
+ gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 1), src);\r
+ a = sum(src);\r
+ b = sum(GpuMat(src));\r
+ if (abs(a[0] - b[0]) > src.size().area() * max_err)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "1 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]);\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ return;\r
+ }\r
+\r
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);\r
a = sum(src);\r
b = sum(GpuMat(src));\r
cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png");\r
cv::Mat img_template; \r
\r
- if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && cv::gpu::DeviceInfo().major() >= 2)\r
+ if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && cv::gpu::DeviceInfo().major() >= 2)\r
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png");\r
else\r
img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png");\r
cv::Mat spmap_template;\r
cv::FileStorage fs;\r
\r
- if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && cv::gpu::DeviceInfo().major() >= 2)\r
+ if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && cv::gpu::DeviceInfo().major() >= 2)\r
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);\r
else\r
fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);\r