src/armnn/Graph.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "Graph.hpp"
   6 #include "LayersFwd.hpp"
   7
   8 #include <armnn/Utils.hpp>
   9 #include <armnn/TypesUtils.hpp>
  10
  11 #include <boost/polymorphic_cast.hpp>
  12 #include <boost/log/trivial.hpp>
  13 #include <boost/assert.hpp>
  14 #include <boost/format.hpp>
  15
  16 #include <unordered_map>
  17 #include <DotSerializer.hpp>
  18 #include <sstream>
  19
  20
  21 namespace armnn
  22 {
  23
  24 Graph::Graph(const Graph& other)
  25 :   m_LayersInOrder(other.m_LayersInOrder)
  26 {
  27     std::unordered_map<const Layer*, Layer*> otherToClonedMap;
  28
  29     for (auto&& otherLayer : other.m_Layers)
  30     {
  31         Layer* const layer = otherLayer->Clone(*this);
  32         otherToClonedMap.emplace(otherLayer, layer);
  33     }
  34
  35     // Copies slot connections.
  36     for (auto&& otherLayer : other.m_Layers)
  37     {
  38         Layer* const thisLayer = otherToClonedMap[otherLayer];
  39
  40         auto outputSlot = thisLayer->BeginOutputSlots();
  41         for (auto&& otherOutputSlot : otherLayer->GetOutputSlots())
  42         {
  43             for (auto&& otherInputSlot : otherOutputSlot.GetConnections())
  44             {
  45                 const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer();
  46                 Layer* const thisTgtLayer = otherToClonedMap[&otherTgtLayer];
  47
  48                 InputSlot& inputSlot = thisTgtLayer->GetInputSlot(otherInputSlot->GetSlotIndex());
  49                 outputSlot->Connect(inputSlot);
  50             }
  51             outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo());
  52             ++outputSlot;
  53         }
  54     }
  55 }
  56
  57 Status Graph::Print() const
  58 {
  59     if (m_Layers.empty())
  60     {
  61         BOOST_LOG_TRIVIAL(info) << "\n Graph is empty.\n";
  62         return Status::Success;
  63     }
  64     BOOST_LOG_TRIVIAL(info) << "\n";
  65     BOOST_LOG_TRIVIAL(info) << "Walking Pattern: \n";
  66
  67     for (auto&& it : TopologicalSort())
  68     {
  69         BOOST_LOG_TRIVIAL(info) << it->GetName() << ":" << GetLayerTypeAsCString(it->GetType())
  70                                 << ":" << GetComputeDeviceAsCString(it->GetComputeDevice());
  71     }
  72     BOOST_LOG_TRIVIAL(info) << "\n\n";
  73
  74     return Status::Success;
  75 }
  76
  77 Status Graph::SerializeToDot(std::ostream& stream)
  78 {
  79     {
  80         DotGraph graph(stream, "Optimized");
  81
  82         {
  83             // Default node attributes:
  84             DotDefaults nodes(stream, "node");
  85             nodes.GetAttributeSet()
  86                 .AddAttribute("shape", "record");
  87         }
  88
  89         {
  90             // Default edge attributes:
  91             DotDefaults edges(stream, "edge");
  92             edges.GetAttributeSet()
  93                 .AddAttribute("fontsize", 8)
  94                 .AddAttribute("fontcolor", "blue")
  95                 .AddAttribute("fontname", "arial-bold");
  96         }
  97
  98         // First declares the nodes.
  99         for (auto&& layer : m_Layers)
 100         {
 101             DotNode node(stream, layer->GetGuid(), GetLayerTypeAsCString(layer->GetType()));
 102             // Extracts the layer parameters.
 103             ParameterStringifyFunction extractParams = [&node](const std::string & name, const std::string & value){
 104                 node.GetContents().AddContent(name + " : " + value);
 105             };
 106             layer->SerializeLayerParameters(extractParams);
 107         }
 108
 109         // Second declares the edges.
 110         for (auto&& layer : m_Layers)
 111         {
 112             LayerGuid toId = layer->GetGuid();
 113
 114             for (unsigned int i=0;i<layer->GetNumInputSlots(); i++)
 115             {
 116                 OutputSlot* outputSlot = static_cast<OutputSlot*>(layer->GetInputSlot(i).GetConnection());
 117                 LayerGuid fromId = outputSlot->GetOwningLayer().GetGuid();
 118                 DotEdge edge(stream, fromId, toId);
 119
 120                 // Now print the tensor shape on the edge.
 121                 {
 122                     // Constructs the label attribute with HTML markup.
 123                     std::stringstream ss;
 124                     ss << "< " << outputSlot->GetTensorInfo().GetShape() << " >";
 125                     edge.GetAttributeSet().AddAttribute("label", ss);
 126                 }
 127             }
 128         }
 129     }
 130
 131     if (stream.bad())
 132     {
 133         return Status::Failure;
 134     }
 135     return Status::Success;
 136 }
 137
 138 Status Graph::AllocateDynamicBuffers()
 139 {
 140     // Layers must be sorted in topological order
 141     BOOST_ASSERT(m_LayersInOrder);
 142
 143     std::unordered_set<const ITensorHandle*> preallocatedTensors;
 144     std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts;
 145
 146     // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
 147     // is a TensorHandle, the function just returns it
 148     auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
 149     {
 150         ITensorHandle* ancestor = subTensorHandle;
 151         while (ancestor && ancestor->GetParent())
 152         {
 153             ancestor = ancestor->GetParent();
 154         }
 155         return ancestor;
 156     };
 157
 158     // Checks whether a TensorHandle has been pre-allocated
 159     auto IsPreallocated = [&](ITensorHandle* const tensorHandle)
 160     {
 161         return tensorHandle && preallocatedTensors.find(tensorHandle) != preallocatedTensors.end();
 162     };
 163
 164     // Constant tensor handles need to last from the beginning of execution till the end,
 165     // therefore we pre-allocate them upfront
 166     for (auto&& layer : m_Layers)
 167     {
 168         if (layer->GetType() == LayerType::Constant)
 169         {
 170             for (auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
 171             {
 172                 ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
 173
 174                 if (tensorHandle && !IsPreallocated(tensorHandle))
 175                 {
 176                     tensorHandle->Allocate();
 177                     preallocatedTensors.insert(tensorHandle);
 178                 }
 179             }
 180         }
 181     }
 182
 183     // Iterate over the network in topological order
 184     for (auto&& layer : m_Layers)
 185     {
 186         // Count the amount of times each output slot references a certain buffer (ITensorHandle).
 187         // The first time we encounter a new tensor handle, we start managing its lifetime.
 188         for (auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
 189         {
 190             ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData());
 191
 192             if (tensorHandle && !IsPreallocated(tensorHandle))
 193             {
 194                 unsigned int numConnections = slot->GetNumConnections();
 195                 if (handleReferenceCounts.find(tensorHandle) == handleReferenceCounts.end())
 196                 {
 197                     handleReferenceCounts[tensorHandle] = numConnections;
 198                     tensorHandle->Manage();
 199                 }
 200                 else
 201                 {
 202                     handleReferenceCounts[tensorHandle] += numConnections;
 203                 }
 204             }
 205         }
 206
 207         // Loop through the input slots in the same layer and decrement the reference counter associated
 208         // to each tensor handle we encounter. Once it reaches zero, we end the lifetime of the tensor handle
 209         for (auto&& slot = layer->BeginInputSlots(); slot != layer->EndInputSlots(); ++slot)
 210         {
 211             ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(
 212                 slot->GetConnectedOutputSlot()->GetOutputHandler().GetData());
 213
 214             if (tensorHandle && !IsPreallocated(tensorHandle))
 215             {
 216                 --handleReferenceCounts[tensorHandle];
 217
 218                 if (handleReferenceCounts[tensorHandle] == 0u)
 219                 {
 220                     // Stop managing lifetime of tensor handle
 221                     tensorHandle->Allocate();
 222                     handleReferenceCounts.erase(tensorHandle);
 223                 }
 224             }
 225         }
 226     }
 227
 228     return Status::Success;
 229 }
 230
 231 const Graph& Graph::TopologicalSort() const
 232 {
 233     if (!m_LayersInOrder)
 234     {
 235         // Resets layer order.
 236         for (auto&& it : m_Layers)
 237         {
 238             it->ResetPriority();
 239         }
 240
 241         auto compareLayerPriority = [](const LayersList::value_type& layerA, const LayersList::value_type& layerB)
 242             {
 243                 return layerA->GetPriority() < layerB->GetPriority();
 244             };
 245
 246         m_Layers.sort(compareLayerPriority);
 247
 248         m_LayersInOrder = true;
 249     }
 250
 251     return *this;
 252 }
 253
 254 void Graph::AddCopyLayers()
 255 {
 256     // Returns true if the given layer could potentially need an intermediate copy layer (depending on its
 257     // connections to other layers). At the time of writing, copy layers will be inserted in the following situations:
 258     // CPU -> CL (and viceversa)
 259     // CPU -> Neon (and viceversa)
 260     auto MayNeedCopyLayer = [](const Layer& layer)
 261         {
 262             // All layers should have been associated with a valid compute device at this point.
 263             BOOST_ASSERT(layer.GetComputeDevice() != Compute::Undefined);
 264             // Does not need another copy layer if a copy layer is already present.
 265             return layer.GetType() != LayerType::MemCopy;
 266         };
 267
 268     for (auto&& srcLayer : m_Layers)
 269     {
 270         if (MayNeedCopyLayer(*srcLayer))
 271         {
 272             unsigned int srcOutputIndex = 0;
 273             for (auto&& srcOutput : srcLayer->GetOutputSlots())
 274             {
 275                 std::vector<InputSlot*> connectionCopy = srcOutput.GetConnections();
 276                 for (auto&& dstInput : connectionCopy)
 277                 {
 278                     Layer& dstLayer = dstInput->GetOwningLayer();
 279                     if (MayNeedCopyLayer(dstLayer) && (dstLayer.GetComputeDevice() != srcLayer->GetComputeDevice()))
 280                     {
 281                         // A copy layer is needed in between the source and destination layers.
 282                         // Record the operation rather than attempting to modify the graph as we go.
 283                         // (invalidating iterators)
 284                         const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
 285                                                                      % srcLayer->GetName()
 286                                                                      % srcOutputIndex
 287                                                                      % dstLayer.GetName()
 288                                                                      % dstInput->GetSlotIndex());
 289
 290                         MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInput, copyLayerName.c_str());
 291                         copyLayer->SetComputeDevice(dstLayer.GetComputeDevice());
 292                     }
 293                 }
 294                 ++srcOutputIndex;
 295             }
 296         }
 297     }
 298 }
 299
 300 void Graph::InferTensorInfos()
 301 {
 302     for (auto&& layer : TopologicalSort())
 303     {
 304         for (auto&& input : layer->GetInputSlots())
 305         {
 306             boost::ignore_unused(input);
 307             BOOST_ASSERT_MSG(input.GetConnectedOutputSlot()->IsTensorInfoSet(),
 308                              "All inputs must have the TensorInfo set at this point.");
 309         }
 310         layer->ValidateTensorShapesFromInputs();
 311     }
 312 }
 313
 314 } // namespace armnn