return wrapper;
}
- class HalideCompiler : public ParallelLoopBody
- {
- public:
- HalideCompiler(const MapIdToLayerData& layers_, int preferableTarget_)
- : layers(&layers_), preferableTarget(preferableTarget_) {}
-
- void operator()(const Range& r) const
- {
- MapIdToLayerData::const_iterator it = layers->begin();
- for (int i = 0; i < r.start && it != layers->end(); ++i, ++it) {}
- for (int i = r.start; i < r.end && it != layers->end(); ++i, ++it)
- {
- const LayerData &ld = it->second;
- Ptr<Layer> layer = ld.layerInstance;
- bool skip = ld.skipFlags.find(DNN_BACKEND_HALIDE)->second;
- if (layer->supportBackend(DNN_BACKEND_HALIDE) && !skip)
- {
- Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
- dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
- }
- }
- }
- private:
- const MapIdToLayerData* layers;
- int preferableTarget;
- };
-
+#ifdef HAVE_HALIDE
void compileHalide()
{
CV_TRACE_FUNCTION();
CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
HalideScheduler scheduler(halideConfigFile);
- MapIdToLayerData::iterator it;
- for (it = layers.begin(); it != layers.end(); ++it)
+ std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
+ for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
{
LayerData &ld = it->second;
Ptr<Layer> layer = ld.layerInstance;
ld.inputBlobs, ld.outputBlobs,
preferableTarget);
}
+ compileList.emplace_back(ld);
}
}
- parallel_for_(Range(0, layers.size()), HalideCompiler(layers, preferableTarget));
+ std::atomic<int> progress(0);
+ auto fn = ([&] () -> void
+ {
+ for (;;)
+ {
+ int id = progress.fetch_add(1);
+ if ((size_t)id >= compileList.size())
+ return;
+ const LayerData& ld = compileList[id].get();
+ Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
+ dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
+ }
+ });
+ size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
+ num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
+ std::vector<std::thread> threads(num_threads - 1);
+ for (auto& t: threads) t = std::thread(fn);
+ fn(); // process own tasks
+ for (auto& t: threads) t.join();
}
+#endif
void clear()
{
if (!netWasAllocated )
{
- // If user didn't call compileHalide() between
- // setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
+#ifdef HAVE_HALIDE
if (preferableBackend == DNN_BACKEND_HALIDE)
compileHalide();
+#else
+ CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
+#endif
}
netWasAllocated = true;