ncvAssertCUDAReturn(cudaMemsetAsync(dv.ptr(), 0, kLevelSizeInBytes, stream), NCV_CUDA_ERROR);\r
\r
//texture format descriptor\r
- cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<float>();\r
+ cudaChannelFormatDesc ch_desc = cudaCreateChannelDesc<float>();\r
\r
I0 = *img0Iter;\r
I1 = *img1Iter;\r
++img0Iter;\r
++img1Iter;\r
\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I0, I0->ptr(), channel_desc, kLevelWidth, kLevelHeight, kLevelStride*sizeof(float)), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I1, I1->ptr(), channel_desc, kLevelWidth, kLevelHeight, kLevelStride*sizeof(float)), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I0, I0->ptr(), ch_desc, kLevelWidth, kLevelHeight, kLevelStride*sizeof(float)), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_I1, I1->ptr(), ch_desc, kLevelWidth, kLevelHeight, kLevelStride*sizeof(float)), NCV_CUDA_ERROR);\r
\r
//compute derivatives\r
dim3 dBlocks(iDivUp(kLevelWidth, 32), iDivUp(kLevelHeight, 6));\r
ncvAssertReturnNcvStat( nppiStFilterRowBorder_32f_C1R (Iy.ptr(), srcSize, nSrcStep, Ixy.ptr(), srcSize, nSrcStep, oROI,\r
nppStBorderMirror, derivativeFilter.ptr(), kDFilterSize, kDFilterSize/2, 1.0f/12.0f) ); \r
\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix0, Ix0.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Iy, Iy.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Iyy, Iyy.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Iy0, Iy0.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixy, Ixy.ptr(), channel_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix, Ix.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixx, Ixx.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ix0, Ix0.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Iy, Iy.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Iyy, Iyy.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Iy0, Iy0.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture2D(0, tex_Ixy, Ixy.ptr(), ch_desc, kLevelWidth, kLevelHeight, kPitchTex), NCV_CUDA_ERROR);\r
\r
// flow\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_u, ptrU->ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_v, ptrV->ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_u, ptrU->ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_v, ptrV->ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
// flow increments\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT));\r
dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT);\r
\r
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_dudv, num_dudv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_dudv, num_dudv.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_u, num_u.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_u, num_u.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
prepare_sor_stage_2<<<psor_blocks, psor_threads, 0, stream>>>(denom_u.ptr(), denom_v.ptr(), kLevelWidth, kLevelHeight, kLevelStride);\r
\r
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);\r
\r
// linear system coefficients\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_x, diffusivity_x.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_diffusivity_y, diffusivity_y.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_dudv, num_dudv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_dudv, num_dudv.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_u, num_u.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_u, num_u.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_numerator_v, num_v.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_inv_denominator_u, denom_u.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_inv_denominator_v, denom_v.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_inv_denominator_u, denom_u.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_inv_denominator_v, denom_v.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
//solve linear system\r
for (Ncv32u solver_iteration = 0; solver_iteration < desc.number_of_solver_iterations; ++solver_iteration)\r
{\r
float omega = 1.99f;\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
sor_pass<0><<<sor_blocks, sor_threads, 0, stream>>>\r
(du_new.ptr(), \r
\r
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv_new.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du_new.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv_new.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
\r
sor_pass<1><<<sor_blocks, sor_threads, 0, stream>>>\r
(du.ptr(), \r
\r
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);\r
\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), channel_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_du, du.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(0, tex_dv, dv.ptr(), ch_desc, kLevelSizeInBytes), NCV_CUDA_ERROR);\r
}//end of solver loop\r
}// end of inner loop\r
\r
{\r
NCVVectorAlloc();\r
NCVVectorAlloc(const NCVVectorAlloc &);\r
- NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&); \r
+ NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);\r
\r
public:\r
\r
- NCVVectorAlloc(INCVMemAllocator &allocator_, Ncv32u length)\r
+ NCVVectorAlloc(INCVMemAllocator &allocator_, Ncv32u length_)\r
:\r
allocator(allocator_)\r
{\r
this->clear();\r
this->allocatedMem.clear();\r
\r
- ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T));\r
+ ncvStat = allocator.alloc(this->allocatedMem, length_ * sizeof(T));\r
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );\r
\r
this->_ptr = (T *)this->allocatedMem.begin.ptr;\r
- this->_length = length;\r
+ this->_length = length_;\r
this->_memtype = this->allocatedMem.begin.memtype;\r
}\r
\r
this->bReused = true;\r
}\r
\r
- NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)\r
+ NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length_)\r
{\r
this->bReused = false;\r
this->clear();\r
\r
- ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \\r
+ ncvAssertPrintReturn(length_ * sizeof(T) <= memSegment.size, \\r
"NCVVectorReuse ctor:: memory binding failed due to size mismatch", );\r
\r
- this->_length = length;\r
+ this->_length = length_;\r
this->_ptr = (T *)memSegment.begin.ptr;\r
this->_memtype = memSegment.begin.memtype;\r
\r
NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);\r
public:\r
\r
- NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u _pitch=0)\r
+ NCVMatrixAlloc(INCVMemAllocator &allocator_, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0)\r
:\r
- allocator(allocator)\r
+ allocator(allocator_)\r
{\r
NCVStatus ncvStat;\r
\r
this->clear();\r
this->allocatedMem.clear();\r
\r
- Ncv32u widthBytes = width * sizeof(T);\r
+ Ncv32u widthBytes = width_ * sizeof(T);\r
Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());\r
\r
- if (_pitch != 0)\r
+ if (pitch_ != 0)\r
{\r
- ncvAssertPrintReturn(_pitch >= pitchBytes &&\r
- (_pitch & (allocator.alignment() - 1)) == 0,\r
+ ncvAssertPrintReturn(pitch_ >= pitchBytes &&\r
+ (pitch_ & (allocator.alignment() - 1)) == 0,\r
"NCVMatrixAlloc ctor:: incorrect pitch passed", );\r
- pitchBytes = _pitch;\r
+ pitchBytes = pitch_;\r
}\r
\r
- Ncv32u requiredAllocSize = pitchBytes * height;\r
+ Ncv32u requiredAllocSize = pitchBytes * height_;\r
\r
ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);\r
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );\r
\r
this->_ptr = (T *)this->allocatedMem.begin.ptr;\r
- this->_width = width;\r
- this->_height = height;\r
+ this->_width = width_;\r
+ this->_height = height_;\r
this->_pitch = pitchBytes;\r
this->_memtype = this->allocatedMem.begin.memtype;\r
}\r
\r
public:\r
\r
- NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)\r
+ NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0, NcvBool bSkipPitchCheck=false)\r
{\r
this->bReused = false;\r
this->clear();\r
\r
- Ncv32u widthBytes = width * sizeof(T);\r
+ Ncv32u widthBytes = width_ * sizeof(T);\r
Ncv32u pitchBytes = alignUp(widthBytes, alignment);\r
\r
- if (pitch != 0)\r
+ if (pitch_ != 0)\r
{\r
if (!bSkipPitchCheck)\r
{\r
- ncvAssertPrintReturn(pitch >= pitchBytes &&\r
- (pitch & (alignment - 1)) == 0,\r
+ ncvAssertPrintReturn(pitch_ >= pitchBytes &&\r
+ (pitch_ & (alignment - 1)) == 0,\r
"NCVMatrixReuse ctor:: incorrect pitch passed", );\r
}\r
else\r
{\r
- ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );\r
+ ncvAssertPrintReturn(pitch_ >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );\r
}\r
- pitchBytes = pitch;\r
+ pitchBytes = pitch_;\r
}\r
\r
- ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \\r
+ ncvAssertPrintReturn(pitchBytes * height_ <= memSegment.size, \\r
"NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );\r
\r
- this->_width = width;\r
- this->_height = height;\r
+ this->_width = width_;\r
+ this->_height = height_;\r
this->_pitch = pitchBytes;\r
this->_ptr = (T *)memSegment.begin.ptr;\r
this->_memtype = memSegment.begin.memtype;\r