namespace canny
{
- texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
struct SrcTex
{
+ virtual ~SrcTex() {}
+
+ __host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
+
+ __device__ __forceinline__ virtual int operator ()(int y, int x) const = 0;
+
int xoff;
int yoff;
- __host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
+ };
+
+ texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
+ struct SrcTexRef : SrcTex
+ {
+ __host__ SrcTexRef(int _xoff, int _yoff) : SrcTex(_xoff, _yoff) {}
- __device__ __forceinline__ int operator ()(int y, int x) const
+ __device__ __forceinline__ int operator ()(int y, int x) const override
{
return tex2D(tex_src, x + xoff, y + yoff);
}
};
- struct SrcTexObject
+ struct SrcTexObj : SrcTex
{
- int xoff;
- int yoff;
- cudaTextureObject_t tex_src_object;
- __host__ SrcTexObject(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : xoff(_xoff), yoff(_yoff), tex_src_object(_tex_src_object) { }
+ __host__ SrcTexObj(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { }
- __device__ __forceinline__ int operator ()(int y, int x) const
+ __device__ __forceinline__ int operator ()(int y, int x) const override
{
return tex2D<uchar>(tex_src_object, x + xoff, y + yoff);
}
+ cudaTextureObject_t tex_src_object;
};
- template <class Norm> __global__
- void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
- {
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
- if (y >= mag.rows || x >= mag.cols)
- return;
-
- int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1));
- int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1));
-
- dx(y, x) = dxVal;
- dy(y, x) = dyVal;
-
- mag(y, x) = norm(dxVal, dyVal);
- }
-
- template <class Norm> __global__
- void calcMagnitudeKernel(const SrcTexObject src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
+ template <
+ class T,
+ class Norm,
+ typename = std::enable_if_t<std::is_base_of<SrcTex, T>::value>
+ >
+ __global__ void calcMagnitudeKernel(const T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (cc30)
{
- cudaResourceDesc resDesc;
- memset(&resDesc, 0, sizeof(resDesc));
- resDesc.resType = cudaResourceTypePitch2D;
- resDesc.res.pitch2D.devPtr = srcWhole.ptr();
- resDesc.res.pitch2D.height = srcWhole.rows;
- resDesc.res.pitch2D.width = srcWhole.cols;
- resDesc.res.pitch2D.pitchInBytes = srcWhole.step;
- resDesc.res.pitch2D.desc = cudaCreateChannelDesc<uchar>();
-
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = cudaAddressModeClamp;
texDesc.addressMode[2] = cudaAddressModeClamp;
cudaTextureObject_t tex = 0;
- cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
+ createTextureObjectPitch2D(&tex, srcWhole, texDesc);
- SrcTexObject src(xoff, yoff, tex);
+ SrcTexObj src(xoff, yoff, tex);
if (L2Grad)
{
else
{
bindTexture(&tex_src, srcWhole);
- SrcTex src(xoff, yoff);
+ SrcTexRef src(xoff, yoff);
if (L2Grad)
{
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/core/cuda/limits.hpp"
+#include "opencv2/core/cuda.hpp"
using namespace cv::cuda;
using namespace cv::cuda::device;
}
}
+ struct SrcTex
+ {
+ virtual ~SrcTex() {}
+
+ __device__ __forceinline__ virtual float I1(float x, float y) const = 0;
+ __device__ __forceinline__ virtual float I1x(float x, float y) const = 0;
+ __device__ __forceinline__ virtual float I1y(float x, float y) const = 0;
+ };
+
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
+ struct SrcTexRef : SrcTex
+ {
+ __device__ __forceinline__ float I1(float x, float y) const override
+ {
+ return tex2D(tex_I1, x, y);
+ }
+ __device__ __forceinline__ float I1x(float x, float y) const override
+ {
+ return tex2D(tex_I1x, x, y);
+ }
+ __device__ __forceinline__ float I1y(float x, float y) const override
+ {
+ return tex2D(tex_I1y, x, y);
+ }
+ };
+
+ struct SrcTexObj : SrcTex
+ {
+ __host__ SrcTexObj(cudaTextureObject_t tex_obj_I1_, cudaTextureObject_t tex_obj_I1x_, cudaTextureObject_t tex_obj_I1y_)
+ : tex_obj_I1(tex_obj_I1_), tex_obj_I1x(tex_obj_I1x_), tex_obj_I1y(tex_obj_I1y_) {}
+
+ __device__ __forceinline__ float I1(float x, float y) const override
+ {
+ return tex2D<float>(tex_obj_I1, x, y);
+ }
+ __device__ __forceinline__ float I1x(float x, float y) const override
+ {
+ return tex2D<float>(tex_obj_I1x, x, y);
+ }
+ __device__ __forceinline__ float I1y(float x, float y) const override
+ {
+ return tex2D<float>(tex_obj_I1y, x, y);
+ }
- __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
+ cudaTextureObject_t tex_obj_I1;
+ cudaTextureObject_t tex_obj_I1x;
+ cudaTextureObject_t tex_obj_I1y;
+ };
+
+ template <
+ typename T,
+ typename = std::enable_if_t<std::is_base_of<SrcTex, T>::value>
+ >
+ __global__ void warpBackwardKernel(
+ const PtrStepSzf I0, const T src, const PtrStepf u1, const PtrStepf u2,
+ PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
{
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
- sum += w * tex2D(tex_I1 , cx, cy);
- sumx += w * tex2D(tex_I1x, cx, cy);
- sumy += w * tex2D(tex_I1y, cx, cy);
+ sum += w * src.I1(cx, cy);
+ sumx += w * src.I1x(cx, cy);
+ sumy += w * src.I1y(cx, cy);
wsum += w;
}
const dim3 block(32, 8);
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
- bindTexture(&tex_I1 , I1);
- bindTexture(&tex_I1x, I1x);
- bindTexture(&tex_I1y, I1y);
+ bool cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
- warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
- cudaSafeCall( cudaGetLastError() );
+ if (cc30)
+ {
+ cudaTextureDesc texDesc;
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = cudaAddressModeClamp;
+ texDesc.addressMode[1] = cudaAddressModeClamp;
+ texDesc.addressMode[2] = cudaAddressModeClamp;
- if (!stream)
- cudaSafeCall( cudaDeviceSynchronize() );
+ cudaTextureObject_t texObj_I1 = 0, texObj_I1x = 0, texObj_I1y = 0;
+
+ createTextureObjectPitch2D(&texObj_I1, I1, texDesc);
+ createTextureObjectPitch2D(&texObj_I1x, I1x, texDesc);
+ createTextureObjectPitch2D(&texObj_I1y, I1y, texDesc);
+
+ warpBackwardKernel << <grid, block, 0, stream >> > (I0, SrcTexObj(texObj_I1, texObj_I1x, texObj_I1y), u1, u2, I1w, I1wx, I1wy, grad, rho);
+ cudaSafeCall(cudaGetLastError());
+
+ if (!stream)
+ cudaSafeCall(cudaDeviceSynchronize());
+ else
+ cudaSafeCall(cudaStreamSynchronize(stream));
+
+ cudaSafeCall(cudaDestroyTextureObject(texObj_I1));
+ cudaSafeCall(cudaDestroyTextureObject(texObj_I1x));
+ cudaSafeCall(cudaDestroyTextureObject(texObj_I1y));
+ }
+ else
+ {
+ bindTexture(&tex_I1, I1);
+ bindTexture(&tex_I1x, I1x);
+ bindTexture(&tex_I1y, I1y);
+
+ warpBackwardKernel << <grid, block, 0, stream >> > (I0, SrcTexRef(), u1, u2, I1w, I1wx, I1wy, grad, rho);
+ cudaSafeCall(cudaGetLastError());
+
+ if (!stream)
+ cudaSafeCall(cudaDeviceSynchronize());
+ }
}
}