Allocate new memory for optimized concat to prevent collisions.

author Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>

Thu, 28 Dec 2017 13:04:09 +0000 (16:04 +0300)

committer Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>

Thu, 28 Dec 2017 13:45:53 +0000 (16:45 +0300)
author Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 28 Dec 2017 13:04:09 +0000 (16:04 +0300)
committer Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 28 Dec 2017 13:45:53 +0000 (16:45 +0300)
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt

index 77c6247..dce7ba1 100644 (file)
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
      endif()
    endif()
  endif()
+
+ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
+if (${the_module}_REUSE_MEMORY)
+  add_definitions(-DREUSE_DNN_MEMORY=1)
+endif()
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp

index 10e4b0e..8889d60 100644 (file)
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -367,43 +367,42 @@ public:
          }
      }
  
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
      {
+#ifdef REUSE_DNN_MEMORY
          Mat bestBlob;
          LayerPin bestBlobPin;
  
-        if( !force )
-        {
-            std::map<LayerPin, Mat>::iterator hostIt;
-            std::map<LayerPin, int>::iterator refIt;
+        std::map<LayerPin, Mat>::iterator hostIt;
+        std::map<LayerPin, int>::iterator refIt;
  
-            const int targetTotal = total(shape);
-            int bestBlobTotal = INT_MAX;
+        const int targetTotal = total(shape);
+        int bestBlobTotal = INT_MAX;
  
-            for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
+        for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
+        {
+            refIt = refCounter.find(hostIt->first);
+            // Use only blobs that had references before because if not,
+            // it might be used as output.
+            if (refIt != refCounter.end() && refIt->second == 0)
              {
-                refIt = refCounter.find(hostIt->first);
-                // Use only blobs that had references before because if not,
-                // it might be used as output.
-                if (refIt != refCounter.end() && refIt->second == 0)
+                Mat& unusedBlob = hostIt->second;
+                if (unusedBlob.total() >= targetTotal &&
+                    unusedBlob.total() < bestBlobTotal)
                  {
-                    Mat& unusedBlob = hostIt->second;
-                    if (unusedBlob.total() >= targetTotal &&
-                        unusedBlob.total() < bestBlobTotal)
-                    {
-                        bestBlobPin = hostIt->first;
-                        bestBlob = unusedBlob;
-                        bestBlobTotal = unusedBlob.total();
-                    }
+                    bestBlobPin = hostIt->first;
+                    bestBlob = unusedBlob;
+                    bestBlobTotal = unusedBlob.total();
                  }
              }
          }
          if (!bestBlob.empty())
          {
              reuse(bestBlobPin, lp);
-            dst = Mat(shape, CV_32F, bestBlob.data);
+            dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
          }
          else
+#endif  // REUSE_DNN_MEMORY
          {
              // if dst already has been allocated with total(shape) elements,
              // it won't be recrreated and pointer of dst.data remains the same.
@@ -412,34 +411,32 @@ public:
          }
      }
  
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
      {
+#ifdef REUSE_DNN_MEMORY
          UMat bestBlob;
          LayerPin bestBlobPin;
  
-        if( !force )
-        {
-            std::map<LayerPin, UMat>::iterator hostIt;
-            std::map<LayerPin, int>::iterator refIt;
+        std::map<LayerPin, UMat>::iterator hostIt;
+        std::map<LayerPin, int>::iterator refIt;
  
-            const int targetTotal = total(shape);
-            int bestBlobTotal = INT_MAX;
+        const int targetTotal = total(shape);
+        int bestBlobTotal = INT_MAX;
  
-            for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
+        for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
+        {
+            refIt = refCounter.find(hostIt->first);
+            // Use only blobs that had references before because if not,
+            // it might be used as output.
+            if (refIt != refCounter.end() && refIt->second == 0)
              {
-                refIt = refCounter.find(hostIt->first);
-                // Use only blobs that had references before because if not,
-                // it might be used as output.
-                if (refIt != refCounter.end() && refIt->second == 0)
+                UMat& unusedBlob = hostIt->second;
+                if (unusedBlob.total() >= targetTotal &&
+                    unusedBlob.total() < bestBlobTotal)
                  {
-                    UMat& unusedBlob = hostIt->second;
-                    if (unusedBlob.total() >= targetTotal &&
-                        unusedBlob.total() < bestBlobTotal)
-                    {
-                        bestBlobPin = hostIt->first;
-                        bestBlob = unusedBlob;
-                        bestBlobTotal = unusedBlob.total();
-                    }
+                    bestBlobPin = hostIt->first;
+                    bestBlob = unusedBlob;
+                    bestBlobTotal = unusedBlob.total();
                  }
              }
          }
@@ -449,6 +446,7 @@ public:
              umat_dst.create(shape, CV_32F);
          }
          else
+#endif  // REUSE_DNN_MEMORY
          {
              // if dst already has been allocated with total(shape) elements,
              // it won't be recrreated and pointer of dst.data remains the same.
@@ -458,8 +456,7 @@ public:
      }
  
      void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
-                               std::vector<LayerPin>& pinsForInternalBlobs,
-                               bool maximizeReuse)
+                               std::vector<LayerPin>& pinsForInternalBlobs)
      {
          CV_TRACE_FUNCTION();
          bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
@@ -530,7 +527,6 @@ public:
          }
  
          std::map<int, std::vector<int> >::reverse_iterator it;
-        bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
          for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
          {
              for(int j = 0; j < it->second.size(); j++)
@@ -539,7 +535,7 @@ public:
                  if (total(shapes[index]))
                  {
                      LayerPin blobPin(ld.id, index);
-                    if (index < outShapes.size() && inPlace && !force)
+                    if (index < outShapes.size() && inPlace)
                      {
                          if (use_umat)
                          {
@@ -558,9 +554,9 @@ public:
                      else
                      {
                          if (use_umat)
-                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
                          else
-                            reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *blobs[index]);
                      }
                  }
              }
@@ -1111,8 +1107,7 @@ struct Net::Impl
          CV_Assert(layerShapesIt != layersShapes.end());
  
          std::vector<LayerPin> pinsForInternalBlobs;
-        bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
-        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
+        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
          ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
          for (int i = 0; i < ld.outputBlobs.size(); ++i)
          {
@@ -1415,6 +1410,9 @@ struct Net::Impl
  
                      if( i >= ninputs )
                      {
+                        // Allocate new memory to prevent collisions during memory
+                        // reusing (see https://github.com/opencv/opencv/pull/10456).
+                        output = output.clone();
                          Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
                          int ofs = 0;
                          for( i = 0; i < ninputs; i++ )
author	Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
	Thu, 28 Dec 2017 13:04:09 +0000 (16:04 +0300)
committer	Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
	Thu, 28 Dec 2017 13:45:53 +0000 (16:45 +0300)
modules/dnn/CMakeLists.txt		patch \| blob \| history
modules/dnn/src/dnn.cpp		patch \| blob \| history