IVGCVSW-4229 Fix Intermittent failures in ExternalProfiling
[platform/upstream/armnn.git] / src / profiling / SendCounterPacket.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "SendCounterPacket.hpp"
7 #include "EncodeVersion.hpp"
8 #include "ProfilingUtils.hpp"
9
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/Conversion.hpp>
12
13 #include <boost/format.hpp>
14 #include <boost/numeric/conversion/cast.hpp>
15 #include <boost/core/ignore_unused.hpp>
16
17 #include <cstring>
18
19 namespace armnn
20 {
21
22 namespace profiling
23 {
24
25 using boost::numeric_cast;
26
27 const unsigned int SendCounterPacket::PIPE_MAGIC;
28
29 void SendCounterPacket::SendStreamMetaDataPacket()
30 {
31     std::string info(GetSoftwareInfo());
32     std::string hardwareVersion(GetHardwareVersion());
33     std::string softwareVersion(GetSoftwareVersion());
34     std::string processName = GetProcessName().substr(0, 60);
35
36     uint32_t infoSize = numeric_cast<uint32_t>(info.size()) > 0 ? numeric_cast<uint32_t>(info.size()) + 1 : 0;
37     uint32_t hardwareVersionSize = numeric_cast<uint32_t>(hardwareVersion.size()) > 0 ?
38                                    numeric_cast<uint32_t>(hardwareVersion.size()) + 1 : 0;
39     uint32_t softwareVersionSize = numeric_cast<uint32_t>(softwareVersion.size()) > 0 ?
40                                    numeric_cast<uint32_t>(softwareVersion.size()) + 1 : 0;
41     uint32_t processNameSize = numeric_cast<uint32_t>(processName.size()) > 0 ?
42                                numeric_cast<uint32_t>(processName.size()) + 1 : 0;
43
44     uint32_t sizeUint32 = numeric_cast<uint32_t>(sizeof(uint32_t));
45
46     uint32_t headerSize = 2 * sizeUint32;
47     uint32_t bodySize = 10 * sizeUint32;
48     uint32_t packetVersionCountSize = sizeUint32;
49
50     // Supported Packets
51     // Stream metadata packet            (packet family=0; packet id=0)
52     // Connection Acknowledged packet    (packet family=0, packet id=1)
53     // Counter Directory packet          (packet family=0; packet id=2)
54     // Request Counter Directory packet  (packet family=0, packet id=3)
55     // Periodic Counter Selection packet (packet family=0, packet id=4)
56     // Periodic Counter Capture packet   (packet family=1, packet class=0, type=0)
57     uint32_t packetVersionEntries = 6;
58
59     uint32_t payloadSize = numeric_cast<uint32_t>(infoSize + hardwareVersionSize + softwareVersionSize +
60                                                   processNameSize + packetVersionCountSize +
61                                                   (packetVersionEntries * 2 * sizeUint32));
62
63     uint32_t totalSize = headerSize + bodySize + payloadSize;
64     uint32_t offset = 0;
65     uint32_t reserved = 0;
66
67     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
68
69     if (writeBuffer == nullptr || reserved < totalSize)
70     {
71         CancelOperationAndThrow<BufferExhaustion>(
72             writeBuffer,
73             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
74     }
75
76     try
77     {
78         // Create header
79
80         WriteUint32(writeBuffer, offset, 0);
81         offset += sizeUint32;
82         WriteUint32(writeBuffer, offset, totalSize - headerSize);
83
84         // Packet body
85
86         offset += sizeUint32;
87         WriteUint32(writeBuffer, offset, PIPE_MAGIC); // pipe_magic
88         offset += sizeUint32;
89         WriteUint32(writeBuffer, offset, EncodeVersion(1, 0, 0)); // stream_metadata_version
90         offset += sizeUint32;
91         WriteUint32(writeBuffer, offset, MAX_METADATA_PACKET_LENGTH); // max_data_length
92         offset += sizeUint32;
93         WriteUint32(writeBuffer, offset, numeric_cast<uint32_t>(getpid())); // pid
94         offset += sizeUint32;
95         uint32_t poolOffset = bodySize;
96         WriteUint32(writeBuffer, offset, infoSize ? poolOffset : 0); // offset_info
97         offset += sizeUint32;
98         poolOffset += infoSize;
99         WriteUint32(writeBuffer, offset, hardwareVersionSize ? poolOffset : 0); // offset_hw_version
100         offset += sizeUint32;
101         poolOffset += hardwareVersionSize;
102         WriteUint32(writeBuffer, offset, softwareVersionSize ? poolOffset : 0); // offset_sw_version
103         offset += sizeUint32;
104         poolOffset += softwareVersionSize;
105         WriteUint32(writeBuffer, offset, processNameSize ? poolOffset : 0); // offset_process_name
106         offset += sizeUint32;
107         poolOffset += processNameSize;
108         WriteUint32(writeBuffer, offset, packetVersionEntries ? poolOffset : 0); // offset_packet_version_table
109         offset += sizeUint32;
110         WriteUint32(writeBuffer, offset, 0); // reserved
111         offset += sizeUint32;
112
113         // Pool
114
115         if (infoSize)
116         {
117             memcpy(&writeBuffer->GetWritableData()[offset], info.c_str(), infoSize);
118             offset += infoSize;
119         }
120
121         if (hardwareVersionSize)
122         {
123             memcpy(&writeBuffer->GetWritableData()[offset], hardwareVersion.c_str(), hardwareVersionSize);
124             offset += hardwareVersionSize;
125         }
126
127         if (softwareVersionSize)
128         {
129             memcpy(&writeBuffer->GetWritableData()[offset], softwareVersion.c_str(), softwareVersionSize);
130             offset += softwareVersionSize;
131         }
132
133         if (processNameSize)
134         {
135             memcpy(&writeBuffer->GetWritableData()[offset], processName.c_str(), processNameSize);
136             offset += processNameSize;
137         }
138
139         if (packetVersionEntries)
140         {
141             // Packet Version Count
142             WriteUint32(writeBuffer, offset, packetVersionEntries << 16);
143
144             // Packet Version Entries
145             uint32_t packetFamily = 0;
146             uint32_t packetId = 0;
147
148             offset += sizeUint32;
149             for (uint32_t i = 0; i < packetVersionEntries - 1; ++i)
150             {
151                 WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId++ & 0x3FF) << 16));
152                 offset += sizeUint32;
153                 WriteUint32(writeBuffer, offset, EncodeVersion(1, 0, 0));
154                 offset += sizeUint32;
155             }
156
157             packetFamily = 1;
158             packetId = 0;
159
160             WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16));
161             offset += sizeUint32;
162             WriteUint32(writeBuffer, offset, EncodeVersion(1, 0, 0));
163         }
164     }
165     catch(...)
166     {
167         CancelOperationAndThrow<RuntimeException>(writeBuffer, "Error processing packet.");
168     }
169
170     m_BufferManager.Commit(writeBuffer, totalSize);
171 }
172
173 bool SendCounterPacket::CreateCategoryRecord(const CategoryPtr& category,
174                                              const Counters& counters,
175                                              CategoryRecord& categoryRecord,
176                                              std::string& errorMessage)
177 {
178     using namespace boost::numeric;
179
180     BOOST_ASSERT(category);
181
182     const std::string& categoryName = category->m_Name;
183     const std::vector<uint16_t> categoryCounters = category->m_Counters;
184     uint16_t deviceUid = category->m_DeviceUid;
185     uint16_t counterSetUid = category->m_CounterSetUid;
186
187     BOOST_ASSERT(!categoryName.empty());
188
189     // Utils
190     size_t uint32_t_size = sizeof(uint32_t);
191
192     // Category record word 0:
193     // 16:31 [16] device: the uid of a device element which identifies some hardware device that
194     //                    the category belongs to
195     // 0:15  [16] counter_set: the uid of a counter_set the category is associated with
196     uint32_t categoryRecordWord0 = (static_cast<uint32_t>(deviceUid) << 16) |
197                                    (static_cast<uint32_t>(counterSetUid));
198
199     // Category record word 1:
200     // 16:31 [16] event_count: number of events belonging to this category
201     // 0:15  [16] reserved: all zeros
202     uint32_t categoryRecordWord1 = static_cast<uint32_t>(categoryCounters.size()) << 16;
203
204     // Category record word 2:
205     // 0:31 [32] event_pointer_table_offset: offset from the beginning of the category data pool to
206     //                                       the event_pointer_table
207     uint32_t categoryRecordWord2 = 0; // The offset is always zero here, as the event pointer table field is always
208                                       // the first item in the pool
209
210     // Convert the device name into a SWTrace namestring
211     std::vector<uint32_t> categoryNameBuffer;
212     if (!StringToSwTraceString<SwTraceNameCharPolicy>(categoryName, categoryNameBuffer))
213     {
214         errorMessage = boost::str(boost::format("Cannot convert the name of category \"%1%\" to an SWTrace namestring")
215                                   % categoryName);
216         return false;
217     }
218
219     // Process the event records
220     size_t counterCount = categoryCounters.size();
221     std::vector<EventRecord> eventRecords(counterCount);
222     std::vector<uint32_t> eventRecordOffsets(counterCount, 0);
223     size_t eventRecordsSize = 0;
224     uint32_t eventRecordsOffset =
225             numeric_cast<uint32_t>((eventRecords.size() + categoryNameBuffer.size()) * uint32_t_size);
226     for (size_t counterIndex = 0, eventRecordIndex = 0, eventRecordOffsetIndex = 0;
227          counterIndex < counterCount;
228          counterIndex++, eventRecordIndex++, eventRecordOffsetIndex++)
229     {
230         uint16_t counterUid = categoryCounters.at(counterIndex);
231         auto it = counters.find(counterUid);
232         BOOST_ASSERT(it != counters.end());
233         const CounterPtr& counter = it->second;
234
235         EventRecord& eventRecord = eventRecords.at(eventRecordIndex);
236         if (!CreateEventRecord(counter, eventRecord, errorMessage))
237         {
238             return false;
239         }
240
241         // Update the total size in words of the event records
242         eventRecordsSize += eventRecord.size();
243
244         // Add the event record offset to the event pointer table offset field
245         eventRecordOffsets[eventRecordOffsetIndex] = eventRecordsOffset;
246         eventRecordsOffset += numeric_cast<uint32_t>(eventRecord.size() * uint32_t_size);
247     }
248
249     // Category record word 3:
250     // 0:31 [32] name_offset (offset from the beginning of the category data pool to the name field)
251     uint32_t categoryRecordWord3 = numeric_cast<uint32_t>(eventRecordOffsets.size() * uint32_t_size);
252
253     // Calculate the size in words of the category record
254     size_t categoryRecordSize = 4u + // The size of the fixed part (device + counter_set + event_count + reserved +
255                                      // event_pointer_table_offset + name_offset)
256                                 eventRecordOffsets.size() + // The size of the variable part (the event pointer table +
257                                 categoryNameBuffer.size() + // and the category name including the null-terminator +
258                                 eventRecordsSize;           // the event records)
259
260     // Allocate the necessary space for the category record
261     categoryRecord.resize(categoryRecordSize);
262
263     ARMNN_NO_CONVERSION_WARN_BEGIN
264     // Create the category record
265     categoryRecord[0] = categoryRecordWord0; // device + counter_set
266     categoryRecord[1] = categoryRecordWord1; // event_count + reserved
267     categoryRecord[2] = categoryRecordWord2; // event_pointer_table_offset
268     categoryRecord[3] = categoryRecordWord3; // name_offset
269     auto offset = categoryRecord.begin() + 4u;
270     std::copy(eventRecordOffsets.begin(), eventRecordOffsets.end(), offset); // event_pointer_table
271     offset += eventRecordOffsets.size();
272     std::copy(categoryNameBuffer.begin(), categoryNameBuffer.end(), offset); // name
273     offset += categoryNameBuffer.size();
274     for (const EventRecord& eventRecord : eventRecords)
275     {
276         std::copy(eventRecord.begin(), eventRecord.end(), offset); // event_record
277         offset += eventRecord.size();
278     }
279     ARMNN_NO_CONVERSION_WARN_END
280
281     return true;
282 }
283
284 bool SendCounterPacket::CreateDeviceRecord(const DevicePtr& device,
285                                            DeviceRecord& deviceRecord,
286                                            std::string& errorMessage)
287 {
288     BOOST_ASSERT(device);
289
290     uint16_t deviceUid = device->m_Uid;
291     const std::string& deviceName = device->m_Name;
292     uint16_t deviceCores = device->m_Cores;
293
294     BOOST_ASSERT(!deviceName.empty());
295
296     // Device record word 0:
297     // 16:31 [16] uid: the unique identifier for the device
298     // 0:15  [16] cores: the number of individual streams of counters for one or more cores of some device
299     uint32_t deviceRecordWord0 = (static_cast<uint32_t>(deviceUid) << 16) |
300                                  (static_cast<uint32_t>(deviceCores));
301
302     // Device record word 1:
303     // 0:31 [32] name_offset: offset from the beginning of the device record pool to the name field
304     uint32_t deviceRecordWord1 = 0; // The offset is always zero here, as the name field is always
305                                     // the first (and only) item in the pool
306
307     // Convert the device name into a SWTrace string
308     std::vector<uint32_t> deviceNameBuffer;
309     if (!StringToSwTraceString<SwTraceCharPolicy>(deviceName, deviceNameBuffer))
310     {
311         errorMessage = boost::str(boost::format("Cannot convert the name of device %1% (\"%2%\") to an SWTrace string")
312                                   % deviceUid
313                                   % deviceName);
314         return false;
315     }
316
317     // Calculate the size in words of the device record
318     size_t deviceRecordSize = 2u + // The size of the fixed part (uid + cores + name_offset)
319                               deviceNameBuffer.size(); // The size of the variable part (the device name including
320                                                        // the null-terminator)
321
322     // Allocate the necessary space for the device record
323     deviceRecord.resize(deviceRecordSize);
324
325     // Create the device record
326     deviceRecord[0] = deviceRecordWord0; // uid + core
327     deviceRecord[1] = deviceRecordWord1; // name_offset
328     auto offset = deviceRecord.begin() + 2u;
329     std::copy(deviceNameBuffer.begin(), deviceNameBuffer.end(), offset); // name
330
331     return true;
332 }
333
334 bool SendCounterPacket::CreateCounterSetRecord(const CounterSetPtr& counterSet,
335                                                CounterSetRecord& counterSetRecord,
336                                                std::string& errorMessage)
337 {
338     BOOST_ASSERT(counterSet);
339
340     uint16_t counterSetUid = counterSet->m_Uid;
341     const std::string& counterSetName = counterSet->m_Name;
342     uint16_t counterSetCount = counterSet->m_Count;
343
344     BOOST_ASSERT(!counterSetName.empty());
345
346     // Counter set record word 0:
347     // 16:31 [16] uid: the unique identifier for the counter_set
348     // 0:15  [16] count: the number of counters which can be active in this set at any one time
349     uint32_t counterSetRecordWord0 = (static_cast<uint32_t>(counterSetUid) << 16) |
350                                      (static_cast<uint32_t>(counterSetCount));
351
352     // Counter set record word 1:
353     // 0:31 [32] name_offset: offset from the beginning of the counter set pool to the name field
354     uint32_t counterSetRecordWord1 = 0; // The offset is always zero here, as the name field is always
355                                         // the first (and only) item in the pool
356
357     // Convert the device name into a SWTrace namestring
358     std::vector<uint32_t> counterSetNameBuffer;
359     if (!StringToSwTraceString<SwTraceNameCharPolicy>(counterSet->m_Name, counterSetNameBuffer))
360     {
361         errorMessage = boost::str(boost::format("Cannot convert the name of counter set %1% (\"%2%\") to "
362                                                 "an SWTrace namestring")
363                                   % counterSetUid
364                                   % counterSetName);
365         return false;
366     }
367
368     // Calculate the size in words of the counter set record
369     size_t counterSetRecordSize = 2u + // The size of the fixed part (uid + cores + name_offset)
370                                   counterSetNameBuffer.size(); // The size of the variable part (the counter set name
371                                                                // including the null-terminator)
372
373     // Allocate the space for the counter set record
374     counterSetRecord.resize(counterSetRecordSize);
375
376     // Create the counter set record
377     counterSetRecord[0] = counterSetRecordWord0; // uid + core
378     counterSetRecord[1] = counterSetRecordWord1; // name_offset
379     auto offset = counterSetRecord.begin() + 2u;
380     std::copy(counterSetNameBuffer.begin(), counterSetNameBuffer.end(), offset); // name
381
382     return true;
383 }
384
385 bool SendCounterPacket::CreateEventRecord(const CounterPtr& counter,
386                                           EventRecord& eventRecord,
387                                           std::string& errorMessage)
388 {
389     using namespace boost::numeric;
390
391     BOOST_ASSERT(counter);
392
393     uint16_t           counterUid           = counter->m_Uid;
394     uint16_t           maxCounterUid        = counter->m_MaxCounterUid;
395     uint16_t           deviceUid            = counter->m_DeviceUid;
396     uint16_t           counterSetUid        = counter->m_CounterSetUid;
397     uint16_t           counterClass         = counter->m_Class;
398     uint16_t           counterInterpolation = counter->m_Interpolation;
399     double             counterMultiplier    = counter->m_Multiplier;
400     const std::string& counterName          = counter->m_Name;
401     const std::string& counterDescription   = counter->m_Description;
402     const std::string& counterUnits         = counter->m_Units;
403
404     BOOST_ASSERT(counterClass == 0 || counterClass == 1);
405     BOOST_ASSERT(counterInterpolation == 0 || counterInterpolation == 1);
406     BOOST_ASSERT(counterMultiplier);
407
408     // Utils
409     size_t uint32_t_size = sizeof(uint32_t);
410
411     // Event record word 0:
412     // 16:31 [16] max_counter_uid: if the device this event is associated with has more than one core and there
413     //                             is one of these counters per core this value will be set to
414     //                             (counter_uid + cores (from device_record)) - 1.
415     //                             If there is only a single core then this value will be the same as
416     //                             the counter_uid value
417     // 0:15  [16] count_uid: unique ID for the counter. Must be unique across all counters in all categories
418     uint32_t eventRecordWord0 = (static_cast<uint32_t>(maxCounterUid) << 16) |
419                                 (static_cast<uint32_t>(counterUid));
420
421     // Event record word 1:
422     // 16:31 [16] device: UID of the device this event is associated with. Set to zero if the event is NOT
423     //                    associated with a device
424     // 0:15  [16] counter_set: UID of the counter_set this event is associated with. Set to zero if the event
425     //                         is NOT associated with a counter_set
426     uint32_t eventRecordWord1 = (static_cast<uint32_t>(deviceUid) << 16) |
427                                 (static_cast<uint32_t>(counterSetUid));
428
429     // Event record word 2:
430     // 16:31 [16] class: type describing how to treat each data point in a stream of data points
431     // 0:15  [16] interpolation: type describing how to interpolate each data point in a stream of data points
432     uint32_t eventRecordWord2 = (static_cast<uint32_t>(counterClass) << 16) |
433                                 (static_cast<uint32_t>(counterInterpolation));
434
435     // Event record word 3-4:
436     // 0:63 [64] multiplier: internal data stream is represented as integer values, this allows scaling of
437     //                       those values as if they are fixed point numbers. Zero is not a valid value
438     uint32_t multiplier[2] = { 0u, 0u };
439     BOOST_ASSERT(sizeof(counterMultiplier) == sizeof(multiplier));
440     std::memcpy(multiplier, &counterMultiplier, sizeof(multiplier));
441     uint32_t eventRecordWord3 = multiplier[0];
442     uint32_t eventRecordWord4 = multiplier[1];
443
444     // Event record word 5:
445     // 0:31 [32] name_offset: offset from the beginning of the event record pool to the name field
446     uint32_t eventRecordWord5 = 0; // The offset is always zero here, as the name field is always
447                                    // the first item in the pool
448
449     // Convert the counter name into a SWTrace string
450     std::vector<uint32_t> counterNameBuffer;
451     if (!StringToSwTraceString<SwTraceCharPolicy>(counterName, counterNameBuffer))
452     {
453         errorMessage = boost::str(boost::format("Cannot convert the name of counter %1% (name: \"%2%\") "
454                                                 "to an SWTrace string")
455                                   % counterUid
456                                   % counterName);
457         return false;
458     }
459
460     // Event record word 6:
461     // 0:31 [32] description_offset: offset from the beginning of the event record pool to the description field
462     // The size of the name buffer in bytes
463     uint32_t eventRecordWord6 = numeric_cast<uint32_t>(counterNameBuffer.size() * uint32_t_size);
464
465     // Convert the counter description into a SWTrace string
466     std::vector<uint32_t> counterDescriptionBuffer;
467     if (!StringToSwTraceString<SwTraceCharPolicy>(counterDescription, counterDescriptionBuffer))
468     {
469         errorMessage = boost::str(boost::format("Cannot convert the description of counter %1% (description: \"%2%\") "
470                                                 "to an SWTrace string")
471                                   % counterUid
472                                   % counterName);
473         return false;
474     }
475
476     // Event record word 7:
477     // 0:31 [32] units_offset: (optional) offset from the beginning of the event record pool to the units field.
478     //                         An offset value of zero indicates this field is not provided
479     bool includeUnits = !counterUnits.empty();
480     // The size of the description buffer in bytes
481     uint32_t eventRecordWord7 = includeUnits ?
482                                 eventRecordWord6 +
483                                 numeric_cast<uint32_t>(counterDescriptionBuffer.size() * uint32_t_size) :
484                                 0;
485
486     // Convert the counter units into a SWTrace namestring (optional)
487     std::vector<uint32_t> counterUnitsBuffer;
488     if (includeUnits)
489     {
490         // Convert the counter units into a SWTrace namestring
491         if (!StringToSwTraceString<SwTraceNameCharPolicy>(counterUnits, counterUnitsBuffer))
492         {
493             errorMessage = boost::str(boost::format("Cannot convert the units of counter %1% (units: \"%2%\") "
494                                                     "to an SWTrace string")
495                                       % counterUid
496                                       % counterName);
497             return false;
498         }
499     }
500
501     // Calculate the size in words of the event record
502     size_t eventRecordSize = 8u + // The size of the fixed part (counter_uid + max_counter_uid + device +
503                                   //                             counter_set + class + interpolation +
504                                   //                             multiplier + name_offset + description_offset +
505                                   //                             units_offset)
506                              counterNameBuffer.size() +        // The size of the variable part (the counter name,
507                              counterDescriptionBuffer.size() + // description and units including the null-terminator)
508                              counterUnitsBuffer.size();
509
510     // Allocate the space for the event record
511     eventRecord.resize(eventRecordSize);
512
513     ARMNN_NO_CONVERSION_WARN_BEGIN
514     // Create the event record
515     eventRecord[0] = eventRecordWord0; // max_counter_uid + counter_uid
516     eventRecord[1] = eventRecordWord1; // device + counter_set
517     eventRecord[2] = eventRecordWord2; // class + interpolation
518     eventRecord[3] = eventRecordWord3; // multiplier
519     eventRecord[4] = eventRecordWord4; // multiplier
520     eventRecord[5] = eventRecordWord5; // name_offset
521     eventRecord[6] = eventRecordWord6; // description_offset
522     eventRecord[7] = eventRecordWord7; // units_offset
523     auto offset = eventRecord.begin() + 8u;
524     std::copy(counterNameBuffer.begin(), counterNameBuffer.end(), offset); // name
525     offset += counterNameBuffer.size();
526     std::copy(counterDescriptionBuffer.begin(), counterDescriptionBuffer.end(), offset); // description
527     if (includeUnits)
528     {
529         offset += counterDescriptionBuffer.size();
530         std::copy(counterUnitsBuffer.begin(), counterUnitsBuffer.end(), offset); // units
531     }
532     ARMNN_NO_CONVERSION_WARN_END
533
534     return true;
535 }
536
537 void SendCounterPacket::SendCounterDirectoryPacket(const ICounterDirectory& counterDirectory)
538 {
539     using namespace boost::numeric;
540
541     // Get the amount of data that needs to be put into the packet
542     uint16_t categoryCount    = counterDirectory.GetCategoryCount();
543     uint16_t deviceCount      = counterDirectory.GetDeviceCount();
544     uint16_t counterSetCount  = counterDirectory.GetCounterSetCount();
545
546     // Utils
547     size_t uint32_t_size = sizeof(uint32_t);
548     size_t packetHeaderSize = 2u;
549     size_t bodyHeaderSize = 6u;
550
551     // Initialize the offset for the pointer tables
552     uint32_t pointerTableOffset = 0;
553
554     // --------------
555     // Device records
556     // --------------
557
558     // Process device records
559     std::vector<DeviceRecord> deviceRecords(deviceCount);
560     const Devices& devices = counterDirectory.GetDevices();
561     std::vector<uint32_t> deviceRecordOffsets(deviceCount, 0); // device_records_pointer_table
562     size_t deviceRecordsSize = 0;
563     size_t deviceIndex = 0;
564     size_t deviceRecordOffsetIndex = 0;
565     for (auto it = devices.begin(); it != devices.end(); it++)
566     {
567         const DevicePtr& device = it->second;
568         DeviceRecord& deviceRecord = deviceRecords.at(deviceIndex);
569
570         std::string errorMessage;
571         if (!CreateDeviceRecord(device, deviceRecord, errorMessage))
572         {
573             CancelOperationAndThrow<RuntimeException>(errorMessage);
574         }
575
576         // Update the total size in words of the device records
577         deviceRecordsSize += deviceRecord.size();
578
579         // Add the device record offset to the device records pointer table offset field
580         deviceRecordOffsets[deviceRecordOffsetIndex] = pointerTableOffset;
581         pointerTableOffset += numeric_cast<uint32_t>(deviceRecord.size() * uint32_t_size);
582
583         deviceIndex++;
584         deviceRecordOffsetIndex++;
585     }
586
587     // -------------------
588     // Counter set records
589     // -------------------
590
591     // Process counter set records
592     std::vector<CounterSetRecord> counterSetRecords(counterSetCount);
593     const CounterSets& counterSets = counterDirectory.GetCounterSets();
594     std::vector<uint32_t> counterSetRecordOffsets(counterSetCount, 0); // counter_set_records_pointer_table
595     size_t counterSetRecordsSize = 0;
596     size_t counterSetIndex = 0;
597     size_t counterSetRecordOffsetIndex = 0;
598     for (auto it = counterSets.begin(); it != counterSets.end(); it++)
599     {
600         const CounterSetPtr& counterSet = it->second;
601         CounterSetRecord& counterSetRecord = counterSetRecords.at(counterSetIndex);
602
603         std::string errorMessage;
604         if (!CreateCounterSetRecord(counterSet, counterSetRecord, errorMessage))
605         {
606             CancelOperationAndThrow<RuntimeException>(errorMessage);
607         }
608
609         // Update the total size in words of the counter set records
610         counterSetRecordsSize += counterSetRecord.size();
611
612         // Add the counter set record offset to the counter set records pointer table offset field
613         counterSetRecordOffsets[counterSetRecordOffsetIndex] = pointerTableOffset;
614         pointerTableOffset += numeric_cast<uint32_t>(counterSetRecord.size() * uint32_t_size);
615
616         counterSetIndex++;
617         counterSetRecordOffsetIndex++;
618     }
619
620     // ----------------
621     // Category records
622     // ----------------
623
624     // Process category records
625     std::vector<CategoryRecord> categoryRecords(categoryCount);
626     const Categories& categories = counterDirectory.GetCategories();
627     std::vector<uint32_t> categoryRecordOffsets(categoryCount, 0); // category_records_pointer_table
628     size_t categoryRecordsSize = 0;
629     size_t categoryIndex = 0;
630     size_t categoryRecordOffsetIndex = 0;
631     for (auto it = categories.begin(); it != categories.end(); it++)
632     {
633         const CategoryPtr& category = *it;
634         CategoryRecord& categoryRecord = categoryRecords.at(categoryIndex);
635
636         std::string errorMessage;
637         if (!CreateCategoryRecord(category, counterDirectory.GetCounters(), categoryRecord, errorMessage))
638         {
639             CancelOperationAndThrow<RuntimeException>(errorMessage);
640         }
641
642         // Update the total size in words of the category records
643         categoryRecordsSize += categoryRecord.size();
644
645         // Add the category record offset to the category records pointer table offset field
646         categoryRecordOffsets[categoryRecordOffsetIndex] = pointerTableOffset;
647         pointerTableOffset += numeric_cast<uint32_t>(categoryRecord.size() * uint32_t_size);
648
649         categoryIndex++;
650         categoryRecordOffsetIndex++;
651     }
652
653
654
655     // Calculate the length in words of the counter directory packet's data (excludes the packet header size)
656     size_t counterDirectoryPacketDataLength =
657             bodyHeaderSize +                 // The size of the body header
658             deviceRecordOffsets.size() +     // The size of the device records pointer table
659             counterSetRecordOffsets.size() + // The size of counter set pointer table
660             categoryRecordOffsets.size() +   // The size of category records pointer table
661             deviceRecordsSize +              // The total size of the device records
662             counterSetRecordsSize +          // The total size of the counter set records
663             categoryRecordsSize;             // The total size of the category records
664
665     // Calculate the size in words of the counter directory packet (the data length plus the packet header size)
666     size_t counterDirectoryPacketSize = packetHeaderSize +                // The size of the packet header
667                                         counterDirectoryPacketDataLength; // The data length
668
669
670     // Allocate the necessary space for the counter directory packet
671     std::vector<uint32_t> counterDirectoryPacket(counterDirectoryPacketSize, 0);
672
673     // -------------
674     // Packet header
675     // -------------
676
677     // Packet header word 0:
678     // 26:31 [6]  packet_family: control Packet Family
679     // 16:25 [10] packet_id: packet identifier
680     // 8:15  [8]  reserved: all zeros
681     // 0:7   [8]  reserved: all zeros
682     uint32_t packetFamily = 0;
683     uint32_t packetId = 2;
684     uint32_t packetHeaderWord0 = ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16);
685
686     // Packet header word 1:
687     // 0:31 [32] data_length: length of data, in bytes
688     uint32_t packetHeaderWord1 = numeric_cast<uint32_t>(counterDirectoryPacketDataLength * uint32_t_size);
689
690     // Create the packet header
691     uint32_t packetHeader[2]
692     {
693         packetHeaderWord0, // packet_family + packet_id + reserved + reserved
694         packetHeaderWord1  // data_length
695     };
696
697     // -----------
698     // Body header
699     // -----------
700
701     // Body header word 0:
702     // 16:31 [16] device_records_count: number of entries in the device_records_pointer_table
703     // 0:15  [16] reserved: all zeros
704     uint32_t bodyHeaderWord0 = static_cast<uint32_t>(deviceCount) << 16;
705
706     // Body header word 1:
707     // 0:31 [32] device_records_pointer_table_offset: offset to the device_records_pointer_table
708     uint32_t bodyHeaderWord1 = 0; // The offset is always zero here, as the device record pointer table field is always
709                                   // the first item in the pool
710
711     // Body header word 2:
712     // 16:31 [16] counter_set_count: number of entries in the counter_set_pointer_table
713     // 0:15  [16] reserved: all zeros
714     uint32_t bodyHeaderWord2 = static_cast<uint32_t>(counterSetCount) << 16;
715
716     // Body header word 3:
717     // 0:31 [32] counter_set_pointer_table_offset: offset to the counter_set_pointer_table
718     uint32_t bodyHeaderWord3 =
719             numeric_cast<uint32_t>(deviceRecordOffsets.size() * uint32_t_size); // The size of the device records
720                                                                                 // pointer table
721
722
723     // Body header word 4:
724     // 16:31 [16] categories_count: number of entries in the categories_pointer_table
725     // 0:15  [16] reserved: all zeros
726     uint32_t bodyHeaderWord4 = static_cast<uint32_t>(categoryCount) << 16;
727
728     // Body header word 3:
729     // 0:31 [32] categories_pointer_table_offset: offset to the categories_pointer_table
730     uint32_t bodyHeaderWord5 =
731             numeric_cast<uint32_t>(deviceRecordOffsets.size() * uint32_t_size +     // The size of the device records
732                                    counterSetRecordOffsets.size() * uint32_t_size); // pointer table, plus the size of
733                                                                                     // the counter set pointer table
734
735     // Create the body header
736     uint32_t bodyHeader[6]
737     {
738         bodyHeaderWord0, // device_records_count + reserved
739         bodyHeaderWord1, // device_records_pointer_table_offset
740         bodyHeaderWord2, // counter_set_count + reserved
741         bodyHeaderWord3, // counter_set_pointer_table_offset
742         bodyHeaderWord4, // categories_count + reserved
743         bodyHeaderWord5  // categories_pointer_table_offset
744     };
745
746     ARMNN_NO_CONVERSION_WARN_BEGIN
747     // Create the counter directory packet
748     auto counterDirectoryPacketOffset = counterDirectoryPacket.begin();
749     // packet_header
750     std::copy(packetHeader, packetHeader + packetHeaderSize, counterDirectoryPacketOffset);
751     counterDirectoryPacketOffset += packetHeaderSize;
752     // body_header
753     std::copy(bodyHeader, bodyHeader + bodyHeaderSize, counterDirectoryPacketOffset);
754     counterDirectoryPacketOffset += bodyHeaderSize;
755     // device_records_pointer_table
756     std::copy(deviceRecordOffsets.begin(), deviceRecordOffsets.end(), counterDirectoryPacketOffset);
757     counterDirectoryPacketOffset += deviceRecordOffsets.size();
758     // counter_set_pointer_table
759     std::copy(counterSetRecordOffsets.begin(), counterSetRecordOffsets.end(), counterDirectoryPacketOffset);
760     counterDirectoryPacketOffset += counterSetRecordOffsets.size();
761     // category_pointer_table
762     std::copy(categoryRecordOffsets.begin(), categoryRecordOffsets.end(), counterDirectoryPacketOffset);
763     counterDirectoryPacketOffset += categoryRecordOffsets.size();
764     // device_records
765     for (const DeviceRecord& deviceRecord : deviceRecords)
766     {
767         std::copy(deviceRecord.begin(), deviceRecord.end(), counterDirectoryPacketOffset); // device_record
768         counterDirectoryPacketOffset += deviceRecord.size();
769     }
770     // counter_set_records
771     for (const CounterSetRecord& counterSetRecord : counterSetRecords)
772     {
773         std::copy(counterSetRecord.begin(), counterSetRecord.end(), counterDirectoryPacketOffset); // counter_set_record
774         counterDirectoryPacketOffset += counterSetRecord.size();
775     }
776     // category_records
777     for (const CategoryRecord& categoryRecord : categoryRecords)
778     {
779         std::copy(categoryRecord.begin(), categoryRecord.end(), counterDirectoryPacketOffset); // category_record
780         counterDirectoryPacketOffset += categoryRecord.size();
781     }
782     ARMNN_NO_CONVERSION_WARN_END
783
784     // Calculate the total size in bytes of the counter directory packet
785     uint32_t totalSize = numeric_cast<uint32_t>(counterDirectoryPacketSize * uint32_t_size);
786
787     // Reserve space in the buffer for the packet
788     uint32_t reserved = 0;
789     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
790
791     if (writeBuffer == nullptr || reserved < totalSize)
792     {
793         CancelOperationAndThrow<BufferExhaustion>(
794             writeBuffer,
795             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
796     }
797
798     // Offset for writing to the buffer
799     uint32_t offset = 0;
800
801     // Write the counter directory packet to the buffer
802     for (uint32_t counterDirectoryPacketWord : counterDirectoryPacket)
803     {
804         WriteUint32(writeBuffer, offset, counterDirectoryPacketWord);
805         offset += numeric_cast<uint32_t>(uint32_t_size);
806     }
807
808     m_BufferManager.Commit(writeBuffer, totalSize);
809 }
810
811 void SendCounterPacket::SendPeriodicCounterCapturePacket(uint64_t timestamp, const IndexValuePairsVector& values)
812 {
813     uint32_t uint16_t_size = sizeof(uint16_t);
814     uint32_t uint32_t_size = sizeof(uint32_t);
815     uint32_t uint64_t_size = sizeof(uint64_t);
816
817     uint32_t packetFamily = 3;
818     uint32_t packetClass = 0;
819     uint32_t packetType = 0;
820     uint32_t headerSize = 2 * uint32_t_size;
821     uint32_t bodySize = uint64_t_size + numeric_cast<uint32_t>(values.size()) * (uint16_t_size + uint32_t_size);
822     uint32_t totalSize = headerSize + bodySize;
823     uint32_t offset = 0;
824     uint32_t reserved = 0;
825
826     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
827
828     if (writeBuffer == nullptr || reserved < totalSize)
829     {
830         CancelOperationAndThrow<BufferExhaustion>(
831             writeBuffer,
832             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
833     }
834
835     // Create header.
836     WriteUint32(writeBuffer,
837                 offset,
838                 ((packetFamily & 0x0000003F) << 26) |
839                 ((packetClass  & 0x0000007F) << 19) |
840                 ((packetType   & 0x00000007) << 16));
841     offset += uint32_t_size;
842     WriteUint32(writeBuffer, offset, bodySize);
843
844     // Copy captured Timestamp.
845     offset += uint32_t_size;
846     WriteUint64(writeBuffer, offset, timestamp);
847
848     // Copy selectedCounterIds.
849     offset += uint64_t_size;
850     for (const auto& pair: values)
851     {
852         WriteUint16(writeBuffer, offset, pair.first);
853         offset += uint16_t_size;
854         WriteUint32(writeBuffer, offset, pair.second);
855         offset += uint32_t_size;
856     }
857
858     m_BufferManager.Commit(writeBuffer, totalSize);
859 }
860
861 void SendCounterPacket::SendPeriodicCounterSelectionPacket(uint32_t capturePeriod,
862                                                            const std::vector<uint16_t>& selectedCounterIds)
863 {
864     uint32_t uint16_t_size = sizeof(uint16_t);
865     uint32_t uint32_t_size = sizeof(uint32_t);
866
867     uint32_t packetFamily = 0;
868     uint32_t packetId = 4;
869     uint32_t headerSize = 2 * uint32_t_size;
870     uint32_t bodySize = uint32_t_size + numeric_cast<uint32_t>(selectedCounterIds.size()) * uint16_t_size;
871     uint32_t totalSize = headerSize + bodySize;
872     uint32_t offset = 0;
873     uint32_t reserved = 0;
874
875     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
876
877     if (writeBuffer == nullptr || reserved < totalSize)
878     {
879         CancelOperationAndThrow<BufferExhaustion>(
880             writeBuffer,
881             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
882     }
883
884     // Create header.
885     WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16));
886     offset += uint32_t_size;
887     WriteUint32(writeBuffer, offset, bodySize);
888
889     // Copy capturePeriod.
890     offset += uint32_t_size;
891     WriteUint32(writeBuffer, offset, capturePeriod);
892
893     // Copy selectedCounterIds.
894     offset += uint32_t_size;
895     for(const uint16_t& id: selectedCounterIds)
896     {
897         WriteUint16(writeBuffer, offset, id);
898         offset += uint16_t_size;
899     }
900
901     m_BufferManager.Commit(writeBuffer, totalSize);
902 }
903
904 void SendCounterPacket::SetReadyToRead()
905 {
906     // We need to wait for the send thread to release its mutex
907     {
908         std::lock_guard<std::mutex> lck(m_WaitMutex);
909         m_ReadyToRead = true;
910     }
911     // Signal the send thread that there's something to read in the buffer
912     m_WaitCondition.notify_one();
913 }
914
915 void SendCounterPacket::Start(IProfilingConnection& profilingConnection)
916 {
917     // Check if the send thread is already running
918     if (m_IsRunning.load())
919     {
920         // The send thread is already running
921         return;
922     }
923
924     if (m_SendThread.joinable())
925     {
926         m_SendThread.join();
927     }
928
929     // Mark the send thread as running
930     m_IsRunning.store(true);
931
932     // Keep the send procedure going until the send thread is signalled to stop
933     m_KeepRunning.store(true);
934
935     // Make sure the send thread will not flush the buffer until signaled to do so
936     // no need for a mutex as the send thread can not be running at this point
937     m_ReadyToRead = false;
938
939     m_PacketSent = false;
940
941     // Start the send thread
942     m_SendThread = std::thread(&SendCounterPacket::Send, this, std::ref(profilingConnection));
943 }
944
945 void SendCounterPacket::Stop(bool rethrowSendThreadExceptions)
946 {
947     // Signal the send thread to stop
948     m_KeepRunning.store(false);
949
950     // Check that the send thread is running
951     if (m_SendThread.joinable())
952     {
953         // Kick the send thread out of the wait condition
954         SetReadyToRead();
955         // Wait for the send thread to complete operations
956         m_SendThread.join();
957     }
958
959     // Check if the send thread exception has to be rethrown
960     if (!rethrowSendThreadExceptions)
961     {
962         // No need to rethrow the send thread exception, return immediately
963         return;
964     }
965
966     // Check if there's an exception to rethrow
967     if (m_SendThreadException)
968     {
969         // Rethrow the send thread exception
970         std::rethrow_exception(m_SendThreadException);
971
972         // Nullify the exception as it has been rethrown
973         m_SendThreadException = nullptr;
974     }
975 }
976
977 void SendCounterPacket::Send(IProfilingConnection& profilingConnection)
978 {
979     // Run once and keep the sending procedure looping until the thread is signalled to stop
980     do
981     {
982         // Check the current state of the profiling service
983         ProfilingState currentState = m_StateMachine.GetCurrentState();
984         switch (currentState)
985         {
986         case ProfilingState::Uninitialised:
987         case ProfilingState::NotConnected:
988
989             // The send thread cannot be running when the profiling service is uninitialized or not connected,
990             // stop the thread immediately
991             m_KeepRunning.store(false);
992             m_IsRunning.store(false);
993
994             // An exception should be thrown here, save it to be rethrown later from the main thread so that
995             // it can be caught by the consumer
996             m_SendThreadException =
997                     std::make_exception_ptr(RuntimeException("The send thread should not be running with the "
998                                                              "profiling service not yet initialized or connected"));
999
1000             return;
1001         case ProfilingState::WaitingForAck:
1002
1003             // Send out a StreamMetadata packet and wait for the profiling connection to be acknowledged.
1004             // When a ConnectionAcknowledged packet is received, the profiling service state will be automatically
1005             // updated by the command handler
1006
1007             // Prepare a StreamMetadata packet and write it to the Counter Stream buffer
1008             SendStreamMetaDataPacket();
1009
1010              // Flush the buffer manually to send the packet
1011             FlushBuffer(profilingConnection);
1012
1013             // Wait for a connection ack from the remote server. We should expect a response within timeout value.
1014             // If not, drop back to the start of the loop and detect somebody closing the thread. Then send the
1015             // StreamMetadata again.
1016
1017             // Wait condition lock scope - Begin
1018             {
1019                 std::unique_lock<std::mutex> lock(m_WaitMutex);
1020
1021                 bool timeout = m_WaitCondition.wait_for(lock,
1022                                                         std::chrono::milliseconds(m_Timeout),
1023                                                         [&]{ return m_ReadyToRead; });
1024                 // If we get notified we need to flush the buffer again
1025                 if(timeout)
1026                 {
1027                     // Otherwise if we just timed out don't flush the buffer
1028                     continue;
1029                 }
1030                 //reset condition variable predicate for next use
1031                 m_ReadyToRead = false;
1032             }
1033             // Wait condition lock scope - End
1034             break;
1035         case ProfilingState::Active:
1036         default:
1037             // Wait condition lock scope - Begin
1038             {
1039                 std::unique_lock<std::mutex> lock(m_WaitMutex);
1040
1041                 // Normal working state for the send thread
1042                 // Check if the send thread is required to enforce a timeout wait policy
1043                 if (m_Timeout < 0)
1044                 {
1045                     // Wait indefinitely until notified that something to read has become available in the buffer
1046                     m_WaitCondition.wait(lock, [&] { return m_ReadyToRead; });
1047                 }
1048                 else
1049                 {
1050                     // Wait until the thread is notified of something to read from the buffer,
1051                     // or check anyway after the specified number of milliseconds
1052                     m_WaitCondition.wait_for(lock, std::chrono::milliseconds(m_Timeout), [&] { return m_ReadyToRead; });
1053                 }
1054
1055                 //reset condition variable predicate for next use
1056                 m_ReadyToRead = false;
1057             }
1058             // Wait condition lock scope - End
1059             break;
1060         }
1061
1062         // Send all the available packets in the buffer
1063         FlushBuffer(profilingConnection);
1064     } while (m_KeepRunning.load());
1065
1066     // Ensure that all readable data got written to the profiling connection before the thread is stopped
1067     // (do not notify any watcher in this case, as this is just to wrap up things before shutting down the send thread)
1068     FlushBuffer(profilingConnection, false);
1069
1070     // Mark the send thread as not running
1071     m_IsRunning.store(false);
1072 }
1073
1074 void SendCounterPacket::FlushBuffer(IProfilingConnection& profilingConnection, bool notifyWatchers)
1075 {
1076     // Get the first available readable buffer
1077     IPacketBufferPtr packetBuffer = m_BufferManager.GetReadableBuffer();
1078
1079     // Initialize the flag that indicates whether at least a packet has been sent
1080     bool packetsSent = false;
1081
1082     while (packetBuffer != nullptr)
1083     {
1084         // Get the data to send from the buffer
1085         const unsigned char* readBuffer = packetBuffer->GetReadableData();
1086         unsigned int readBufferSize = packetBuffer->GetSize();
1087
1088         if (readBuffer == nullptr || readBufferSize == 0)
1089         {
1090             // Nothing to send, get the next available readable buffer and continue
1091             m_BufferManager.MarkRead(packetBuffer);
1092             packetBuffer = m_BufferManager.GetReadableBuffer();
1093
1094             continue;
1095         }
1096
1097         // Check that the profiling connection is open, silently drop the data and continue if it's closed
1098         if (profilingConnection.IsOpen())
1099         {
1100             // Write a packet to the profiling connection. Silently ignore any write error and continue
1101             profilingConnection.WritePacket(readBuffer, boost::numeric_cast<uint32_t>(readBufferSize));
1102
1103             // Set the flag that indicates whether at least a packet has been sent
1104             packetsSent = true;
1105         }
1106
1107         // Mark the packet buffer as read
1108         m_BufferManager.MarkRead(packetBuffer);
1109
1110         // Get the next available readable buffer
1111         packetBuffer = m_BufferManager.GetReadableBuffer();
1112     }
1113     // Check whether at least a packet has been sent
1114     if (packetsSent && notifyWatchers)
1115     {
1116         // Wait for the parent thread to release its mutex if necessary
1117         {
1118             std::lock_guard<std::mutex> lck(m_PacketSentWaitMutex);
1119             m_PacketSent = true;
1120         }
1121         // Notify to any watcher that something has been sent
1122         m_PacketSentWaitCondition.notify_one();
1123     }
1124 }
1125
1126 bool SendCounterPacket::WaitForPacketSent(uint32_t timeout = 1000)
1127 {
1128     std::unique_lock<std::mutex> lock(m_PacketSentWaitMutex);
1129     // Blocks until notified that at least a packet has been sent or until timeout expires.
1130     bool timedOut = m_PacketSentWaitCondition.wait_for(lock,
1131                                                        std::chrono::milliseconds(timeout),
1132                                                        [&] { return m_PacketSent; });
1133
1134     m_PacketSent = false;
1135
1136     return timedOut;
1137 }
1138
1139 } // namespace profiling
1140
1141 } // namespace armnn