From: Vladislav Vinogradov Date: Tue, 27 Aug 2013 07:21:41 +0000 (+0400) Subject: gpu TVL1 Optical Flow optimization: X-Git-Tag: accepted/tizen/ivi/20140515.103456~1^2~541^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bff0fad6c3ad72c83202c9501989c3951d3851b8;p=profile%2Fivi%2Fopencv.git gpu TVL1 Optical Flow optimization: do not calculate sum of error in every round of iteration; instead the error will be summed every 2nd times or more, if the previous sum of error is too far away from threshold. --- diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index f0f3397..9894f7f 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1, TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v); - GPU_SANITY_CHECK(u, 1e-2); - GPU_SANITY_CHECK(v, 1e-2); + GPU_SANITY_CHECK(u, 1e-1); + GPU_SANITY_CHECK(v, 1e-1); } else { diff --git a/modules/gpu/src/cuda/tvl1flow.cu b/modules/gpu/src/cuda/tvl1flow.cu index a2b934b..8383a5d 100644 --- a/modules/gpu/src/cuda/tvl1flow.cu +++ b/modules/gpu/src/cuda/tvl1flow.cu @@ -211,7 +211,7 @@ namespace tvl1flow const PtrStepf grad, const PtrStepf rho_c, const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, PtrStepf u1, PtrStepf u2, PtrStepf error, - const float l_t, const float theta) + const float l_t, const float theta, const bool calcError) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -265,21 +265,24 @@ namespace tvl1flow u1(y, x) = u1NewVal; u2(y, x) = u2NewVal; - const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); - const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); - error(y, x) = n1 + n2; + if (calcError) + { + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error(y, x) = n1 + n2; + } } void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, - float l_t, float theta) + float l_t, float theta, bool calcError) { const dim3 block(32, 8); const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); - estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); + estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); diff --git a/modules/gpu/src/tvl1flow.cpp b/modules/gpu/src/tvl1flow.cpp index 9971324..1916d1b 100644 --- a/modules/gpu/src/tvl1flow.cpp +++ b/modules/gpu/src/tvl1flow.cpp @@ -173,7 +173,7 @@ namespace tvl1flow PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, - float l_t, float theta); + float l_t, float theta, bool calcError); void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut); } @@ -218,12 +218,24 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); double error = numeric_limits::max(); + double prevError = 0.0; for (int n = 0; error > scaledEpsilon && n < iterations; ++n) { - estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast(theta)); + // some tweaks to make sum operation less frequently + bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon); - if (epsilon > 0) + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast(theta), calcError); + + if (calcError) + { error = gpu::sum(diff, norm_buf)[0]; + prevError = error; + } + else + { + error = numeric_limits::max(); + prevError -= scaledEpsilon; + } estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); }