#include <stddef.h>
#include <stdio.h>
-#include <iostream>
+//#include <iostream>
#include "cuda_shared.hpp"
#include "cuda_runtime.h"
namespace mat_operators
{
//////////////////////////////////////////////////////////
+ // CopyTo
+ //////////////////////////////////////////////////////////
+
+ template<typename T>
+ __global__ void kernel_copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
+ {
+ size_t x = blockIdx.x * blockDim.x + threadIdx.x;
+ size_t y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if ((x < cols * channels ) && (y < rows))
+ if (mask[y * step_mask + x / channels] != 0)
+ {
+ size_t idx = y * (step_mat / sizeof(T)) + x;
+ mat_dst[idx] = mat_src[idx];
+ }
+ }
+
+ //////////////////////////////////////////////////////////
// SetTo
//////////////////////////////////////////////////////////
{
//////////////////////////////////////////////////////////////
+ // CopyTo
+ //////////////////////////////////////////////////////////////
+
+ typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels);
+
+ template<typename T>
+ void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels)
+ {
+ dim3 threadsPerBlock(16,16, 1);
+ dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
+ ::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
+ ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
+ cudaSafeCall ( cudaThreadSynchronize() );
+ }
+
+ extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels)
+ {
+ static CopyToFunc tab[8] =
+ {
+ copy_to_with_mask_run<unsigned char>,
+ copy_to_with_mask_run<char>,
+ copy_to_with_mask_run<unsigned short>,
+ copy_to_with_mask_run<short>,
+ copy_to_with_mask_run<int>,
+ copy_to_with_mask_run<float>,
+ copy_to_with_mask_run<double>,
+ 0
+ };
+
+ CopyToFunc func = tab[depth];
+
+ if (func == 0) error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__);
+
+ func(mat_src, mat_dst, mask, channels);
+ }
+
+
+ //////////////////////////////////////////////////////////////
// SetTo
//////////////////////////////////////////////////////////////
// ConvertTo
//////////////////////////////////////////////////////////////
+ typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta);
-
- typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta);
-
- //#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 130)
+ //#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 130)
template<typename T, typename DT>
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta)
--- /dev/null
+#include "gputest.hpp"
+#include "highgui.h"
+#include "cv.h"
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <iomanip> // for cout << setw()
+
+using namespace cv;
+using namespace std;
+using namespace gpu;
+
+class CV_GpuMatOpCopyTo : public CvTest
+{
+ public:
+ CV_GpuMatOpCopyTo();
+ ~CV_GpuMatOpCopyTo();
+ protected:
+
+ template <typename T>
+ void print_mat(const T & mat, const std::string & name) const;
+
+ void run(int);
+
+ bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
+
+ private:
+ int rows;
+ int cols;
+};
+
+CV_GpuMatOpCopyTo::CV_GpuMatOpCopyTo(): CvTest( "GpuMatOperatorCopyTo", "copyTo" )
+{
+ rows = 234;
+ cols = 123;
+
+ //#define PRINT_MATRIX
+}
+
+CV_GpuMatOpCopyTo::~CV_GpuMatOpCopyTo() {}
+
+template<typename T>
+void CV_GpuMatOpCopyTo::print_mat(const T & mat, const std::string & name) const
+{
+ cv::imshow(name, mat);
+}
+
+bool CV_GpuMatOpCopyTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
+{
+ Mat cmat(cpumat.size(), cpumat.type(), Scalar::all(0));
+ GpuMat gmat(cmat);
+
+ Mat cpumask(cpumat.size(), CV_8U);
+ randu(cpumask, Scalar::all(0), Scalar::all(127));
+ threshold(cpumask, cpumask, 0, 127, THRESH_BINARY);
+ GpuMat gpumask(cpumask);
+
+ //int64 time = getTickCount();
+ cpumat.copyTo(cmat, cpumask);
+ //int64 time1 = getTickCount();
+ gpumat.copyTo(gmat, gpumask);
+ //int64 time2 = getTickCount();
+
+ //std::cout << "\ntime cpu: " << std::fixed << std::setprecision(12) << 1.0 / double((time1 - time) / (double)getTickFrequency());
+ //std::cout << "\ntime gpu: " << std::fixed << std::setprecision(12) << 1.0 / double((time2 - time1) / (double)getTickFrequency());
+ //std::cout << "\n";
+
+#ifdef PRINT_MATRIX
+ print_mat(cmat, "cpu mat");
+ print_mat(gmat, "gpu mat");
+ print_mat(cpumask, "cpu mask");
+ print_mat(gpumask, "gpu mask");
+ cv::waitKey(0);
+#endif
+
+ double ret = norm(cmat, gmat);
+
+ if (ret < 1.0)
+ return true;
+ else
+ {
+ std::cout << "return : " << ret << "\n";
+ return false;
+ }
+}
+
+void CV_GpuMatOpCopyTo::run( int /* start_from */)
+{
+ bool is_test_good = true;
+
+ for (int i = 0 ; i < 7; i++)
+ {
+ Mat cpumat(rows, cols, i);
+ cpumat.setTo(Scalar::all(127));
+
+ GpuMat gpumat(cpumat);
+
+ is_test_good &= compare_matrix(cpumat, gpumat);
+ }
+
+ if (is_test_good == true)
+ ts->set_failed_test_info(CvTS::OK);
+ else
+ ts->set_failed_test_info(CvTS::FAIL_GENERIC);
+}
+
+CV_GpuMatOpCopyTo CV_GpuMatOpCopyTo_test;