Allocate new memory for optimized concat to prevent collisions.
authorDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 28 Dec 2017 13:04:09 +0000 (16:04 +0300)
committerDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 28 Dec 2017 13:45:53 +0000 (16:45 +0300)
Add a flag to disable memory reusing in dnn module.

modules/dnn/CMakeLists.txt
modules/dnn/src/dnn.cpp

index 77c6247..dce7ba1 100644 (file)
@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
     endif()
   endif()
 endif()
+
+ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
+if (${the_module}_REUSE_MEMORY)
+  add_definitions(-DREUSE_DNN_MEMORY=1)
+endif()
index 10e4b0e..8889d60 100644 (file)
@@ -367,43 +367,42 @@ public:
         }
     }
 
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
     {
+#ifdef REUSE_DNN_MEMORY
         Mat bestBlob;
         LayerPin bestBlobPin;
 
-        if( !force )
-        {
-            std::map<LayerPin, Mat>::iterator hostIt;
-            std::map<LayerPin, int>::iterator refIt;
+        std::map<LayerPin, Mat>::iterator hostIt;
+        std::map<LayerPin, int>::iterator refIt;
 
-            const int targetTotal = total(shape);
-            int bestBlobTotal = INT_MAX;
+        const int targetTotal = total(shape);
+        int bestBlobTotal = INT_MAX;
 
-            for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
+        for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
+        {
+            refIt = refCounter.find(hostIt->first);
+            // Use only blobs that had references before because if not,
+            // it might be used as output.
+            if (refIt != refCounter.end() && refIt->second == 0)
             {
-                refIt = refCounter.find(hostIt->first);
-                // Use only blobs that had references before because if not,
-                // it might be used as output.
-                if (refIt != refCounter.end() && refIt->second == 0)
+                Mat& unusedBlob = hostIt->second;
+                if (unusedBlob.total() >= targetTotal &&
+                    unusedBlob.total() < bestBlobTotal)
                 {
-                    Mat& unusedBlob = hostIt->second;
-                    if (unusedBlob.total() >= targetTotal &&
-                        unusedBlob.total() < bestBlobTotal)
-                    {
-                        bestBlobPin = hostIt->first;
-                        bestBlob = unusedBlob;
-                        bestBlobTotal = unusedBlob.total();
-                    }
+                    bestBlobPin = hostIt->first;
+                    bestBlob = unusedBlob;
+                    bestBlobTotal = unusedBlob.total();
                 }
             }
         }
         if (!bestBlob.empty())
         {
             reuse(bestBlobPin, lp);
-            dst = Mat(shape, CV_32F, bestBlob.data);
+            dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
         }
         else
+#endif  // REUSE_DNN_MEMORY
         {
             // if dst already has been allocated with total(shape) elements,
             // it won't be recrreated and pointer of dst.data remains the same.
@@ -412,34 +411,32 @@ public:
         }
     }
 
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
     {
+#ifdef REUSE_DNN_MEMORY
         UMat bestBlob;
         LayerPin bestBlobPin;
 
-        if( !force )
-        {
-            std::map<LayerPin, UMat>::iterator hostIt;
-            std::map<LayerPin, int>::iterator refIt;
+        std::map<LayerPin, UMat>::iterator hostIt;
+        std::map<LayerPin, int>::iterator refIt;
 
-            const int targetTotal = total(shape);
-            int bestBlobTotal = INT_MAX;
+        const int targetTotal = total(shape);
+        int bestBlobTotal = INT_MAX;
 
-            for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
+        for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
+        {
+            refIt = refCounter.find(hostIt->first);
+            // Use only blobs that had references before because if not,
+            // it might be used as output.
+            if (refIt != refCounter.end() && refIt->second == 0)
             {
-                refIt = refCounter.find(hostIt->first);
-                // Use only blobs that had references before because if not,
-                // it might be used as output.
-                if (refIt != refCounter.end() && refIt->second == 0)
+                UMat& unusedBlob = hostIt->second;
+                if (unusedBlob.total() >= targetTotal &&
+                    unusedBlob.total() < bestBlobTotal)
                 {
-                    UMat& unusedBlob = hostIt->second;
-                    if (unusedBlob.total() >= targetTotal &&
-                        unusedBlob.total() < bestBlobTotal)
-                    {
-                        bestBlobPin = hostIt->first;
-                        bestBlob = unusedBlob;
-                        bestBlobTotal = unusedBlob.total();
-                    }
+                    bestBlobPin = hostIt->first;
+                    bestBlob = unusedBlob;
+                    bestBlobTotal = unusedBlob.total();
                 }
             }
         }
@@ -449,6 +446,7 @@ public:
             umat_dst.create(shape, CV_32F);
         }
         else
+#endif  // REUSE_DNN_MEMORY
         {
             // if dst already has been allocated with total(shape) elements,
             // it won't be recrreated and pointer of dst.data remains the same.
@@ -458,8 +456,7 @@ public:
     }
 
     void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
-                               std::vector<LayerPin>& pinsForInternalBlobs,
-                               bool maximizeReuse)
+                               std::vector<LayerPin>& pinsForInternalBlobs)
     {
         CV_TRACE_FUNCTION();
         bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
@@ -530,7 +527,6 @@ public:
         }
 
         std::map<int, std::vector<int> >::reverse_iterator it;
-        bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
         for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
         {
             for(int j = 0; j < it->second.size(); j++)
@@ -539,7 +535,7 @@ public:
                 if (total(shapes[index]))
                 {
                     LayerPin blobPin(ld.id, index);
-                    if (index < outShapes.size() && inPlace && !force)
+                    if (index < outShapes.size() && inPlace)
                     {
                         if (use_umat)
                         {
@@ -558,9 +554,9 @@ public:
                     else
                     {
                         if (use_umat)
-                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
                         else
-                            reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *blobs[index]);
                     }
                 }
             }
@@ -1111,8 +1107,7 @@ struct Net::Impl
         CV_Assert(layerShapesIt != layersShapes.end());
 
         std::vector<LayerPin> pinsForInternalBlobs;
-        bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
-        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
+        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
         ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
         for (int i = 0; i < ld.outputBlobs.size(); ++i)
         {
@@ -1415,6 +1410,9 @@ struct Net::Impl
 
                     if( i >= ninputs )
                     {
+                        // Allocate new memory to prevent collisions during memory
+                        // reusing (see https://github.com/opencv/opencv/pull/10456).
+                        output = output.clone();
                         Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
                         int ofs = 0;
                         for( i = 0; i < ninputs; i++ )