}
//////////////////////////////////////////////////////////////////////////////
-// mulSpectrums
-
-#ifdef HAVE_CUFFT
-
-namespace cv { namespace cuda { namespace device
-{
- void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-
- void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-}}}
-
-#endif
-
-void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
-{
-#ifndef HAVE_CUFFT
- (void) _src1;
- (void) _src2;
- (void) _dst;
- (void) flags;
- (void) conjB;
- (void) stream;
- throw_no_cuda();
-#else
- (void) flags;
-
- typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
- static Caller callers[] = { device::mulSpectrums, device::mulSpectrums_CONJ };
-
- GpuMat src1 = _src1.getGpuMat();
- GpuMat src2 = _src2.getGpuMat();
-
- CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
- CV_Assert( src1.size() == src2.size() );
-
- _dst.create(src1.size(), CV_32FC2);
- GpuMat dst = _dst.getGpuMat();
-
- Caller caller = callers[(int)conjB];
- caller(src1, src2, dst, StreamAccessor::getStream(stream));
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// mulAndScaleSpectrums
-
-#ifdef HAVE_CUFFT
-
-namespace cv { namespace cuda { namespace device
-{
- void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-
- void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
-}}}
-
-#endif
-
-void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
-{
-#ifndef HAVE_CUFFT
- (void) _src1;
- (void) _src2;
- (void) _dst;
- (void) flags;
- (void) scale;
- (void) conjB;
- (void) stream;
- throw_no_cuda();
-#else
- (void)flags;
-
- typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream);
- static Caller callers[] = { device::mulAndScaleSpectrums, device::mulAndScaleSpectrums_CONJ };
-
- GpuMat src1 = _src1.getGpuMat();
- GpuMat src2 = _src2.getGpuMat();
-
- CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
- CV_Assert( src1.size() == src2.size() );
-
- _dst.create(src1.size(), CV_32FC2);
- GpuMat dst = _dst.getGpuMat();
-
- Caller caller = callers[(int)conjB];
- caller(src1, src2, scale, dst, StreamAccessor::getStream(stream));
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////////
// dft
void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
//
//M*/
-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"
-#include "cvconfig.h"
+#ifndef HAVE_OPENCV_CUDEV
-#ifdef HAVE_CUFFT
+#error "opencv_cudev is required"
-#include <cufft.h>
+#else
-#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudev.hpp"
-namespace cv { namespace cuda { namespace device
-{
- //////////////////////////////////////////////////////////////////////////
- // mulSpectrums
+using namespace cv::cudev;
- __global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
- {
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
+//////////////////////////////////////////////////////////////////////////////
+// mulSpectrums
- if (x < c.cols && y < c.rows)
- {
- c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
- }
+namespace
+{
+ __device__ __forceinline__ float real(const float2& val)
+ {
+ return val.x;
}
-
- void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+ __device__ __forceinline__ float imag(const float2& val)
{
- dim3 threads(256);
- dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
- mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
- cudaSafeCall( cudaGetLastError() );
-
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
+ return val.y;
}
+ __device__ __forceinline__ float2 cmul(const float2& a, const float2& b)
+ {
+ return make_float2((real(a) * real(b)) - (imag(a) * imag(b)),
+ (real(a) * imag(b)) + (imag(a) * real(b)));
+ }
- //////////////////////////////////////////////////////////////////////////
- // mulSpectrums_CONJ
-
- __global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
+ __device__ __forceinline__ float2 conj(const float2& a)
{
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
+ return make_float2(real(a), -imag(a));
+ }
- if (x < c.cols && y < c.rows)
+ struct comlex_mul : binary_function<float2, float2, float2>
+ {
+ __device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
- c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
+ return cmul(a, b);
}
- }
+ };
-
- void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+ struct comlex_mul_conj : binary_function<float2, float2, float2>
{
- dim3 threads(256);
- dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
- mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
- cudaSafeCall( cudaGetLastError() );
-
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
- }
+ __device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
+ {
+ return cmul(a, conj(b));
+ }
+ };
+ struct comlex_mul_scale : binary_function<float2, float2, float2>
+ {
+ float scale;
- //////////////////////////////////////////////////////////////////////////
- // mulAndScaleSpectrums
+ __device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
+ {
+ return scale * cmul(a, b);
+ }
+ };
- __global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
+ struct comlex_mul_conj_scale : binary_function<float2, float2, float2>
{
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
+ float scale;
- if (x < c.cols && y < c.rows)
+ __device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
- cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
- c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
+ return scale * cmul(a, conj(b));
}
- }
+ };
+}
+void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
+{
+ (void) flags;
- void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
- {
- dim3 threads(256);
- dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
+ GpuMat src1 = _src1.getGpuMat();
+ GpuMat src2 = _src2.getGpuMat();
- mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c);
- cudaSafeCall( cudaGetLastError() );
+ CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
+ CV_Assert( src1.size() == src2.size() );
- if (stream)
- cudaSafeCall( cudaDeviceSynchronize() );
- }
+ _dst.create(src1.size(), CV_32FC2);
+ GpuMat dst = _dst.getGpuMat();
+ if (conjB)
+ gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
+ else
+ gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
+}
- //////////////////////////////////////////////////////////////////////////
- // mulAndScaleSpectrums_CONJ
+void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
+{
+ (void) flags;
- __global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
- {
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
+ GpuMat src1 = _src1.getGpuMat();
+ GpuMat src2 = _src2.getGpuMat();
- if (x < c.cols && y < c.rows)
- {
- cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
- c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
- }
- }
+ CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
+ CV_Assert( src1.size() == src2.size() );
+ _dst.create(src1.size(), CV_32FC2);
+ GpuMat dst = _dst.getGpuMat();
- void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
+ if (conjB)
{
- dim3 threads(256);
- dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
-
- mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
- cudaSafeCall( cudaGetLastError() );
-
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
+ comlex_mul_conj_scale op;
+ op.scale = scale;
+ gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
-}}} // namespace cv { namespace cuda { namespace cudev
-
-#endif // HAVE_CUFFT
+ else
+ {
+ comlex_mul_scale op;
+ op.scale = scale;
+ gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
+ }
+}
-#endif /* CUDA_DISABLER */
+#endif