dnn(halide): don't compile Halide via parallel_for_()
authorAlexander Alekhin <alexander.alekhin@intel.com>
Tue, 10 Oct 2017 14:52:55 +0000 (17:52 +0300)
committerAlexander Alekhin <alexander.alekhin@intel.com>
Tue, 10 Oct 2017 15:06:03 +0000 (18:06 +0300)
To avoid problem with reduced stack size of inner threads.

modules/dnn/src/dnn.cpp

index 424e842..a14d767 100644 (file)
@@ -589,33 +589,7 @@ struct Net::Impl
         return wrapper;
     }
 
-    class HalideCompiler : public ParallelLoopBody
-    {
-    public:
-        HalideCompiler(const MapIdToLayerData& layers_, int preferableTarget_)
-            : layers(&layers_), preferableTarget(preferableTarget_) {}
-
-        void operator()(const Range& r) const
-        {
-            MapIdToLayerData::const_iterator it = layers->begin();
-            for (int i = 0; i < r.start && it != layers->end(); ++i, ++it) {}
-            for (int i = r.start; i < r.end && it != layers->end(); ++i, ++it)
-            {
-                const LayerData &ld = it->second;
-                Ptr<Layer> layer = ld.layerInstance;
-                bool skip = ld.skipFlags.find(DNN_BACKEND_HALIDE)->second;
-                if (layer->supportBackend(DNN_BACKEND_HALIDE) && !skip)
-                {
-                    Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
-                    dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
-                }
-            }
-        }
-    private:
-        const MapIdToLayerData* layers;
-        int preferableTarget;
-    };
-
+#ifdef HAVE_HALIDE
     void compileHalide()
     {
         CV_TRACE_FUNCTION();
@@ -623,8 +597,8 @@ struct Net::Impl
         CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
 
         HalideScheduler scheduler(halideConfigFile);
-        MapIdToLayerData::iterator it;
-        for (it = layers.begin(); it != layers.end(); ++it)
+        std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
         {
             LayerData &ld = it->second;
             Ptr<Layer> layer = ld.layerInstance;
@@ -639,10 +613,30 @@ struct Net::Impl
                                                 ld.inputBlobs, ld.outputBlobs,
                                                 preferableTarget);
                 }
+                compileList.emplace_back(ld);
             }
         }
-        parallel_for_(Range(0, layers.size()), HalideCompiler(layers, preferableTarget));
+        std::atomic<int> progress(0);
+        auto fn = ([&] () -> void
+        {
+            for (;;)
+            {
+                int id = progress.fetch_add(1);
+                if ((size_t)id >= compileList.size())
+                    return;
+                const LayerData& ld = compileList[id].get();
+                Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
+                dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
+            }
+        });
+        size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
+        num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
+        std::vector<std::thread> threads(num_threads - 1);
+        for (auto& t: threads) t = std::thread(fn);
+        fn(); // process own tasks
+        for (auto& t: threads) t.join();
     }
+#endif
 
     void clear()
     {
@@ -692,10 +686,12 @@ struct Net::Impl
 
             if (!netWasAllocated )
             {
-                // If user didn't call compileHalide() between
-                // setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
+#ifdef HAVE_HALIDE
                 if (preferableBackend == DNN_BACKEND_HALIDE)
                     compileHalide();
+#else
+                CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
+#endif
             }
 
             netWasAllocated = true;