From d547d9c98678270c6ee3d777a88890f4bb982381 Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Thu, 12 Apr 2012 10:09:51 +0000 Subject: [PATCH] fixed alignment related bugs in gpu/nvidia --- modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp | 40 ++++++------ modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp | 77 ++++++++++++++--------- modules/gpu/src/nvidia/core/NCV.hpp | 17 +++-- modules/gpu/test/nvidia/TestRectStdDev.h | 2 +- 4 files changed, 82 insertions(+), 54 deletions(-) diff --git a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp b/modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp index 0c8ad59..f2a6287 100644 --- a/modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp +++ b/modules/gpu/src/nvidia/NCVBroxOpticalFlow.hpp @@ -1,7 +1,7 @@ /*M/////////////////////////////////////////////////////////////////////////////////////// // -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. @@ -64,18 +64,18 @@ /// \brief Model and solver parameters struct NCVBroxOpticalFlowDescriptor { - /// flow smoothness - Ncv32f alpha; - /// gradient constancy importance - Ncv32f gamma; - /// pyramid scale factor - Ncv32f scale_factor; - /// number of lagged non-linearity iterations (inner loop) - Ncv32u number_of_inner_iterations; - /// number of warping iterations (number of pyramid levels) - Ncv32u number_of_outer_iterations; - /// number of linear system solver iterations - Ncv32u number_of_solver_iterations; + /// flow smoothness + Ncv32f alpha; + /// gradient constancy importance + Ncv32f gamma; + /// pyramid scale factor + Ncv32f scale_factor; + /// number of lagged non-linearity iterations (inner loop) + Ncv32u number_of_inner_iterations; + /// number of warping iterations (number of pyramid levels) + Ncv32u number_of_outer_iterations; + /// number of linear system solver iterations + Ncv32u number_of_solver_iterations; }; ///////////////////////////////////////////////////////////////////////////////////////// @@ -93,11 +93,11 @@ struct NCVBroxOpticalFlowDescriptor NCV_EXPORTS NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, - INCVMemAllocator &gpu_mem_allocator, - const NCVMatrix &frame0, - const NCVMatrix &frame1, - NCVMatrix &u, - NCVMatrix &v, - cudaStream_t stream); + INCVMemAllocator &gpu_mem_allocator, + const NCVMatrix &frame0, + const NCVMatrix &frame1, + NCVMatrix &u, + NCVMatrix &v, + cudaStream_t stream); #endif diff --git a/modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp b/modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp index 88fdddd..eddc390 100644 --- a/modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp +++ b/modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp @@ -59,6 +59,7 @@ #define _ncvhaarobjectdetection_hpp_ #include +#include #include "NCV.hpp" @@ -68,41 +69,43 @@ // //============================================================================== - struct HaarFeature64 { - uint2 _ui2; + union + { + uint2 _ui2; + struct {NcvRect8u__ _rect; Ncv32f _f;}; + }; + #define HaarFeature64_CreateCheck_MaxRectField 0xFF __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/) { ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES); - ((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX; - ((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY; - ((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth; - ((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight; + _rect = NcvRect8u(rectX,rectY,rectWidth,rectHeight); + return NCV_SUCCESS; } __host__ NCVStatus setWeight(Ncv32f weight) { - ((Ncv32f*)&(this->_ui2.y))[0] = weight; + _f = weight; + return NCV_SUCCESS; } __device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight) { - NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x); - *rectX = tmpRect.x; - *rectY = tmpRect.y; - *rectWidth = tmpRect.width; - *rectHeight = tmpRect.height; + *rectX = _rect.x; + *rectY = _rect.y; + *rectWidth = _rect.width; + *rectHeight = _rect.height; } __device__ __host__ Ncv32f getWeight(void) { - return *(Ncv32f*)(&this->_ui2.y); + return _f; } }; @@ -171,23 +174,27 @@ public: struct HaarClassifierNodeDescriptor32 { +union +{ uint1 _ui1; + Ncv32f _f; +}; __host__ NCVStatus create(Ncv32f leafValue) { - *(Ncv32f *)&this->_ui1 = leafValue; + _f = leafValue; return NCV_SUCCESS; } __host__ NCVStatus create(Ncv32u offsetHaarClassifierNode) { - this->_ui1.x = offsetHaarClassifierNode; + _ui1.x = offsetHaarClassifierNode; return NCV_SUCCESS; } __host__ Ncv32f getLeafValueHost(void) { - return *(Ncv32f *)&this->_ui1.x; + return _f; } #ifdef __CUDACC__ @@ -199,57 +206,67 @@ struct HaarClassifierNodeDescriptor32 __device__ __host__ Ncv32u getNextNodeOffset(void) { - return this->_ui1.x; + return _ui1.x; } }; struct HaarClassifierNode128 { +union +{ uint4 _ui4; + struct + { + HaarFeatureDescriptor32 _f; + Ncv32f _t; + HaarClassifierNodeDescriptor32 _nl; + HaarClassifierNodeDescriptor32 _nr; + }; +}; __host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f) { - this->_ui4.x = *(Ncv32u *)&f; + _f = f; return NCV_SUCCESS; } __host__ NCVStatus setThreshold(Ncv32f t) { - this->_ui4.y = *(Ncv32u *)&t; + _t = t; return NCV_SUCCESS; } __host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl) { - this->_ui4.z = *(Ncv32u *)&nl; + _nl = nl; return NCV_SUCCESS; } __host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr) { - this->_ui4.w = *(Ncv32u *)&nr; + _nr = nr; return NCV_SUCCESS; } __host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void) { - return *(HaarFeatureDescriptor32 *)&this->_ui4.x; + return _f; } __host__ __device__ Ncv32f getThreshold(void) { - return *(Ncv32f*)&this->_ui4.y; + return _t; } __host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void) { - return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z; + return _nl; } __host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void) { - return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w; + return _nr; } }; @@ -260,11 +277,15 @@ struct HaarStage64 #define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000 #define HaarStage64_Interpret_ShiftRootNodeOffset 16 +union +{ uint2 _ui2; + struct {Ncv32f _t; Ncv32u _root;}; +}; __host__ NCVStatus setStageThreshold(Ncv32f t) { - this->_ui2.x = *(Ncv32u *)&t; + _t = t; return NCV_SUCCESS; } @@ -290,7 +311,7 @@ struct HaarStage64 __host__ __device__ Ncv32f getStageThreshold(void) { - return *(Ncv32f*)&this->_ui2.x; + return _t; } __host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void) @@ -304,14 +325,12 @@ struct HaarStage64 } }; - NCV_CT_ASSERT(sizeof(HaarFeature64) == 8); NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4); NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4); NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16); NCV_CT_ASSERT(sizeof(HaarStage64) == 8); - //============================================================================== // // Classifier cascade descriptor diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpu/src/nvidia/core/NCV.hpp index 79d6caa..ae2a144 100644 --- a/modules/gpu/src/nvidia/core/NCV.hpp +++ b/modules/gpu/src/nvidia/core/NCV.hpp @@ -134,15 +134,24 @@ typedef unsigned char Ncv8u; typedef float Ncv32f; typedef double Ncv64f; - -struct NcvRect8u +struct NcvRect8u__ { Ncv8u x; Ncv8u y; Ncv8u width; Ncv8u height; - __host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {}; - __host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {} +}; + +struct NcvRect8u : NcvRect8u__ +{ + __host__ __device__ NcvRect8u() {} + __host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) + { + x = x; + y = y; + width = width; + height = height; + } }; diff --git a/modules/gpu/test/nvidia/TestRectStdDev.h b/modules/gpu/test/nvidia/TestRectStdDev.h index 7c0473e..f556c19 100644 --- a/modules/gpu/test/nvidia/TestRectStdDev.h +++ b/modules/gpu/test/nvidia/TestRectStdDev.h @@ -33,9 +33,9 @@ private: TestRectStdDev& operator=(const TestRectStdDev&); NCVTestSourceProvider &src; - NcvRect32u rect; Ncv32u width; Ncv32u height; + NcvRect32u rect; Ncv32f scaleFactor; NcvBool bTextureCache; -- 2.7.4