core: fix flag processing for nested regions in cv::parallel_for_()
authorAlexander Alekhin <alexander.a.alekhin@gmail.com>
Thu, 10 Aug 2017 08:17:14 +0000 (08:17 +0000)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Thu, 10 Aug 2017 08:37:47 +0000 (08:37 +0000)
modules/core/src/parallel.cpp

index 53ebb42..ff1ce9b 100644 (file)
@@ -363,6 +363,10 @@ static SchedPtr pplScheduler;
 
 /* ================================   parallel_for_  ================================ */
 
+#ifdef CV_PARALLEL_FRAMEWORK
+static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes); // forward declaration
+#endif
+
 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
 {
 #ifdef OPENCV_TRACE
@@ -377,10 +381,35 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
         return;
 
 #ifdef CV_PARALLEL_FRAMEWORK
+    static volatile int flagNestedParallelFor = 0;
+    bool isNotNestedRegion = flagNestedParallelFor == 0;
+    if (isNotNestedRegion)
+      isNotNestedRegion = CV_XADD(&flagNestedParallelFor, 1) == 0;
+    if (isNotNestedRegion)
+    {
+        try
+        {
+            parallel_for_impl(range, body, nstripes);
+            flagNestedParallelFor = 0;
+        }
+        catch (...)
+        {
+            flagNestedParallelFor = 0;
+            throw;
+        }
+    }
+    else // nested parallel_for_() calls are not parallelized
+#endif // CV_PARALLEL_FRAMEWORK
+    {
+        (void)nstripes;
+        body(range);
+    }
+}
 
-    static int flagNestedParallelFor = 0;
-    bool isNotNesterParallelFor = CV_XADD(&flagNestedParallelFor, 1) == 0;
-    if(numThreads != 0 && isNotNesterParallelFor)
+#ifdef CV_PARALLEL_FRAMEWORK
+static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
+{
+    if ((numThreads < 0 || numThreads > 1) && range.end - range.start > 1)
     {
         ParallelLoopBodyWrapperContext ctx(body, range, nstripes);
         ProxyLoopBody pbody(ctx);
@@ -388,7 +417,6 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
         if( stripeRange.end - stripeRange.start == 1 )
         {
             body(range);
-            flagNestedParallelFor = 0;
             return;
         }
 
@@ -444,16 +472,14 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
 #error You have hacked and compiling with unsupported parallel framework
 
 #endif
-        flagNestedParallelFor = 0;
     }
     else
-
-#endif // CV_PARALLEL_FRAMEWORK
     {
-        (void)nstripes;
         body(range);
     }
 }
+#endif // CV_PARALLEL_FRAMEWORK
+
 
 int cv::getNumThreads(void)
 {