/////////////////////////////////// Remap ///////////////////////////////////////////////\r
namespace cv { namespace gpu { namespace imgproc\r
{\r
- // cudaAddressModeClamp == BrdReplicate\r
- /*texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_remap_uchar_LinearFilter(0, cudaFilterModeLinear, cudaAddressModeClamp);\r
-\r
- __global__ void remap_uchar_LinearFilter(const PtrStepf mapx, const PtrStepf mapy, DevMem2D dst)\r
- { \r
- const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (x < dst.cols && y < dst.rows)\r
- {\r
- const float xcoo = mapx.ptr(y)[x];\r
- const float ycoo = mapy.ptr(y)[x];\r
-\r
- dst.ptr(y)[x] = 255.0f * tex2D(tex_remap_uchar_LinearFilter, xcoo, ycoo); \r
- }\r
- }*/\r
-\r
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)\r
{\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));\r
}\r
}\r
-\r
- template <template <typename> class Filter, template <typename> class B, typename T> \r
- void remap_caller(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, T borderValue)\r
+ \r
+ template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream\r
{\r
- dim3 block(32, 8);\r
- dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+ static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue)\r
+ {\r
+ typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; \r
+ \r
+ dim3 block(32, 8);\r
+ dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
\r
- B<T> brd(src.rows, src.cols, borderValue);\r
- BorderReader< PtrStep_<T>, B<T> > brd_src(src, brd);\r
- Filter< BorderReader< PtrStep_<T>, B<T> > > filter_src(brd_src);\r
+ B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));\r
+ BorderReader< PtrStep_<T>, B<work_type> > brd_src(src, brd);\r
+ Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brd_src);\r
\r
- remap<<<grid, block>>>(filter_src, mapx, mapy, dst);\r
- cudaSafeCall( cudaGetLastError() );\r
+ remap<<<grid, block>>>(filter_src, mapx, mapy, dst);\r
+ cudaSafeCall( cudaGetLastError() );\r
\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ };\r
\r
-#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type, filter) \\r
- template <> void remap_caller<filter, BrdReplicate>(const DevMem2D_<type>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<type>& dst, type) \\r
+#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \\r
+ texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \\r
+ struct tex_remap_ ## type ## _reader \\r
{ \\r
- const dim3 block(16, 16); \\r
- const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
- TextureBinder tex(&tex_remap_ ## type ## _ ## filter ## , src); \\r
- remap_ ## type ## _ ## filter ## <<<grid, block>>>(mapx, mapy, dst); \\r
- cudaSafeCall( cudaGetLastError() ); \\r
- cudaSafeCall( cudaDeviceSynchronize() ); \\r
- }\r
+ typedef type elem_type; \\r
+ typedef int index_type; \\r
+ __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \\r
+ { \\r
+ return tex2D(tex_remap_ ## type , x, y); \\r
+ } \\r
+ }; \\r
+ template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \\r
+ { \\r
+ static void call(const DevMem2D_< type >& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_< type >& dst, const float*) \\r
+ { \\r
+ dim3 block(32, 8); \\r
+ dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
+ TextureBinder texHandler(&tex_remap_ ## type , src); \\r
+ tex_remap_ ## type ##_reader texSrc; \\r
+ Filter<tex_remap_ ## type ##_reader> filter_src(texSrc); \\r
+ remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \\r
+ cudaSafeCall( cudaGetLastError() ); \\r
+ cudaSafeCall( cudaDeviceSynchronize() ); \\r
+ } \\r
+ };\r
+ \r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)\r
+ \r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)\r
+ \r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)\r
+ \r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)\r
+ \r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)\r
+ \r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)\r
+ OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)\r
+ \r
+#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX\r
+\r
+ template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher\r
+ { \r
+ static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)\r
+ {\r
+ if (stream == 0)\r
+ RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue);\r
+ else\r
+ callStream(src, mapx, mapy, dst, borderValue, stream);\r
+ }\r
+ \r
+ static void callStream(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)\r
+ {\r
+ typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; \r
+ \r
+ dim3 block(32, 8);\r
+ dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
\r
- //OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar, LinearFilter)\r
+ B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));\r
+ BorderReader< PtrStep_<T>, B<work_type> > brd_src(src, brd);\r
+ Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brd_src);\r
\r
-#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX\r
+ remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);\r
+ cudaSafeCall( cudaGetLastError() );\r
+ }\r
+ };\r
\r
- template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, \r
- int interpolation, int borderMode, const double borderValue[4])\r
+ template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream)\r
{\r
- typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, T borderValue);\r
+ typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream);\r
\r
static const caller_t callers[2][5] = \r
{\r
- { remap_caller<PointFilter, BrdReflect101>, remap_caller<PointFilter, BrdReplicate>, remap_caller<PointFilter, BrdConstant>, remap_caller<PointFilter, BrdReflect>, remap_caller<PointFilter, BrdWrap> },\r
- { remap_caller<LinearFilter, BrdReflect101>, remap_caller<LinearFilter, BrdReplicate>, remap_caller<LinearFilter, BrdConstant>, remap_caller<LinearFilter, BrdReflect>, remap_caller<LinearFilter, BrdWrap> }\r
+ { \r
+ RemapDispatcher<PointFilter, BrdReflect101, T>::call, \r
+ RemapDispatcher<PointFilter, BrdReplicate, T>::call, \r
+ RemapDispatcher<PointFilter, BrdConstant, T>::call, \r
+ RemapDispatcher<PointFilter, BrdReflect, T>::call, \r
+ RemapDispatcher<PointFilter, BrdWrap, T>::call \r
+ },\r
+ { \r
+ RemapDispatcher<LinearFilter, BrdReflect101, T>::call, \r
+ RemapDispatcher<LinearFilter, BrdReplicate, T>::call, \r
+ RemapDispatcher<LinearFilter, BrdConstant, T>::call, \r
+ RemapDispatcher<LinearFilter, BrdReflect, T>::call, \r
+ RemapDispatcher<LinearFilter, BrdWrap, T>::call \r
+ }\r
};\r
\r
- typename VecTraits<T>::elem_type brd[] = {(typename VecTraits<T>::elem_type)borderValue[0], (typename VecTraits<T>::elem_type)borderValue[1], (typename VecTraits<T>::elem_type)borderValue[2], (typename VecTraits<T>::elem_type)borderValue[3]};\r
-\r
- callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), VecTraits<T>::make(brd));\r
+ callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), borderValue, stream);\r
}\r
\r
- template void remap_gpu<uchar >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<uchar2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<uchar3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<uchar4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uchar >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<uchar2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<uchar3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<uchar4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
- template void remap_gpu<schar>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<char2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<char3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<char4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<schar>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<char2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<char3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<char4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
- template void remap_gpu<ushort >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<ushort2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<ushort3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<ushort4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<ushort >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<ushort2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<ushort3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<ushort4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
- template void remap_gpu<short >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<short2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<short3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<short4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<short >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<short2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<short3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<short4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
- template void remap_gpu<uint >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<uint2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<uint3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<uint4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uint >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<uint2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<uint3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<uint4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
- template void remap_gpu<int >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<int2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<int3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<int4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<int >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<int2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<int3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<int4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
- template void remap_gpu<float >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<float2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<float3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
- template void remap_gpu<float4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<float >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<float2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<float3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
+ template void remap_gpu<float4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
\r
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////\r
\r
\r
namespace build_warp_maps\r
{\r
+\r
__constant__ float cr[9];\r
__constant__ float crinv[9];\r
__constant__ float cf, cs;\r