Add thin abstraction layer for processes and filesystem
[platform/upstream/armnn.git] / src / profiling / SendCounterPacket.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "SendCounterPacket.hpp"
7 #include "EncodeVersion.hpp"
8 #include "ProfilingUtils.hpp"
9
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/Conversion.hpp>
12 #include <Processes.hpp>
13
14 #include <boost/format.hpp>
15 #include <boost/numeric/conversion/cast.hpp>
16 #include <boost/core/ignore_unused.hpp>
17
18 #include <cstring>
19
20 namespace armnn
21 {
22
23 namespace profiling
24 {
25
26 using boost::numeric_cast;
27
28 const unsigned int SendCounterPacket::PIPE_MAGIC;
29
30 void SendCounterPacket::SendStreamMetaDataPacket()
31 {
32     std::string info(GetSoftwareInfo());
33     std::string hardwareVersion(GetHardwareVersion());
34     std::string softwareVersion(GetSoftwareVersion());
35     std::string processName = GetProcessName().substr(0, 60);
36
37     uint32_t infoSize = numeric_cast<uint32_t>(info.size()) > 0 ? numeric_cast<uint32_t>(info.size()) + 1 : 0;
38     uint32_t hardwareVersionSize = numeric_cast<uint32_t>(hardwareVersion.size()) > 0 ?
39                                    numeric_cast<uint32_t>(hardwareVersion.size()) + 1 : 0;
40     uint32_t softwareVersionSize = numeric_cast<uint32_t>(softwareVersion.size()) > 0 ?
41                                    numeric_cast<uint32_t>(softwareVersion.size()) + 1 : 0;
42     uint32_t processNameSize = numeric_cast<uint32_t>(processName.size()) > 0 ?
43                                numeric_cast<uint32_t>(processName.size()) + 1 : 0;
44
45     uint32_t sizeUint32 = numeric_cast<uint32_t>(sizeof(uint32_t));
46
47     uint32_t headerSize = 2 * sizeUint32;
48     uint32_t bodySize = 10 * sizeUint32;
49     uint32_t packetVersionCountSize = sizeUint32;
50
51     // Supported Packets
52     // Stream metadata packet            (packet family=0; packet id=0)
53     // Connection Acknowledged packet    (packet family=0, packet id=1)
54     // Counter Directory packet          (packet family=0; packet id=2)
55     // Request Counter Directory packet  (packet family=0, packet id=3)
56     // Periodic Counter Selection packet (packet family=0, packet id=4)
57     // Periodic Counter Capture packet   (packet family=1, packet class=0, type=0)
58     uint32_t packetVersionEntries = 6;
59
60     uint32_t payloadSize = numeric_cast<uint32_t>(infoSize + hardwareVersionSize + softwareVersionSize +
61                                                   processNameSize + packetVersionCountSize +
62                                                   (packetVersionEntries * 2 * sizeUint32));
63
64     uint32_t totalSize = headerSize + bodySize + payloadSize;
65     uint32_t offset = 0;
66     uint32_t reserved = 0;
67
68     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
69
70     if (writeBuffer == nullptr || reserved < totalSize)
71     {
72         CancelOperationAndThrow<BufferExhaustion>(
73             writeBuffer,
74             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
75     }
76
77     try
78     {
79         // Create header
80
81         WriteUint32(writeBuffer, offset, 0);
82         offset += sizeUint32;
83         WriteUint32(writeBuffer, offset, totalSize - headerSize);
84
85         // Packet body
86
87         offset += sizeUint32;
88         WriteUint32(writeBuffer, offset, PIPE_MAGIC); // pipe_magic
89         offset += sizeUint32;
90         WriteUint32(writeBuffer, offset, EncodeVersion(1, 0, 0)); // stream_metadata_version
91         offset += sizeUint32;
92         WriteUint32(writeBuffer, offset, MAX_METADATA_PACKET_LENGTH); // max_data_length
93         offset += sizeUint32;
94         int pid = armnnUtils::Processes::GetCurrentId();
95         WriteUint32(writeBuffer, offset, numeric_cast<uint32_t>(pid)); // pid
96         offset += sizeUint32;
97         uint32_t poolOffset = bodySize;
98         WriteUint32(writeBuffer, offset, infoSize ? poolOffset : 0); // offset_info
99         offset += sizeUint32;
100         poolOffset += infoSize;
101         WriteUint32(writeBuffer, offset, hardwareVersionSize ? poolOffset : 0); // offset_hw_version
102         offset += sizeUint32;
103         poolOffset += hardwareVersionSize;
104         WriteUint32(writeBuffer, offset, softwareVersionSize ? poolOffset : 0); // offset_sw_version
105         offset += sizeUint32;
106         poolOffset += softwareVersionSize;
107         WriteUint32(writeBuffer, offset, processNameSize ? poolOffset : 0); // offset_process_name
108         offset += sizeUint32;
109         poolOffset += processNameSize;
110         WriteUint32(writeBuffer, offset, packetVersionEntries ? poolOffset : 0); // offset_packet_version_table
111         offset += sizeUint32;
112         WriteUint32(writeBuffer, offset, 0); // reserved
113         offset += sizeUint32;
114
115         // Pool
116
117         if (infoSize)
118         {
119             memcpy(&writeBuffer->GetWritableData()[offset], info.c_str(), infoSize);
120             offset += infoSize;
121         }
122
123         if (hardwareVersionSize)
124         {
125             memcpy(&writeBuffer->GetWritableData()[offset], hardwareVersion.c_str(), hardwareVersionSize);
126             offset += hardwareVersionSize;
127         }
128
129         if (softwareVersionSize)
130         {
131             memcpy(&writeBuffer->GetWritableData()[offset], softwareVersion.c_str(), softwareVersionSize);
132             offset += softwareVersionSize;
133         }
134
135         if (processNameSize)
136         {
137             memcpy(&writeBuffer->GetWritableData()[offset], processName.c_str(), processNameSize);
138             offset += processNameSize;
139         }
140
141         if (packetVersionEntries)
142         {
143             // Packet Version Count
144             WriteUint32(writeBuffer, offset, packetVersionEntries << 16);
145
146             // Packet Version Entries
147             uint32_t packetFamily = 0;
148             uint32_t packetId = 0;
149
150             offset += sizeUint32;
151             for (uint32_t i = 0; i < packetVersionEntries - 1; ++i)
152             {
153                 WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId++ & 0x3FF) << 16));
154                 offset += sizeUint32;
155                 WriteUint32(writeBuffer, offset, EncodeVersion(1, 0, 0));
156                 offset += sizeUint32;
157             }
158
159             packetFamily = 1;
160             packetId = 0;
161
162             WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16));
163             offset += sizeUint32;
164             WriteUint32(writeBuffer, offset, EncodeVersion(1, 0, 0));
165         }
166     }
167     catch(...)
168     {
169         CancelOperationAndThrow<RuntimeException>(writeBuffer, "Error processing packet.");
170     }
171
172     m_BufferManager.Commit(writeBuffer, totalSize);
173 }
174
175 bool SendCounterPacket::CreateCategoryRecord(const CategoryPtr& category,
176                                              const Counters& counters,
177                                              CategoryRecord& categoryRecord,
178                                              std::string& errorMessage)
179 {
180     using namespace boost::numeric;
181
182     BOOST_ASSERT(category);
183
184     const std::string& categoryName = category->m_Name;
185     const std::vector<uint16_t> categoryCounters = category->m_Counters;
186     uint16_t deviceUid = category->m_DeviceUid;
187     uint16_t counterSetUid = category->m_CounterSetUid;
188
189     BOOST_ASSERT(!categoryName.empty());
190
191     // Utils
192     size_t uint32_t_size = sizeof(uint32_t);
193
194     // Category record word 0:
195     // 16:31 [16] device: the uid of a device element which identifies some hardware device that
196     //                    the category belongs to
197     // 0:15  [16] counter_set: the uid of a counter_set the category is associated with
198     uint32_t categoryRecordWord0 = (static_cast<uint32_t>(deviceUid) << 16) |
199                                    (static_cast<uint32_t>(counterSetUid));
200
201     // Category record word 1:
202     // 16:31 [16] event_count: number of events belonging to this category
203     // 0:15  [16] reserved: all zeros
204     uint32_t categoryRecordWord1 = static_cast<uint32_t>(categoryCounters.size()) << 16;
205
206     // Category record word 2:
207     // 0:31 [32] event_pointer_table_offset: offset from the beginning of the category data pool to
208     //                                       the event_pointer_table
209     uint32_t categoryRecordWord2 = 0; // The offset is always zero here, as the event pointer table field is always
210                                       // the first item in the pool
211
212     // Convert the device name into a SWTrace namestring
213     std::vector<uint32_t> categoryNameBuffer;
214     if (!StringToSwTraceString<SwTraceNameCharPolicy>(categoryName, categoryNameBuffer))
215     {
216         errorMessage = boost::str(boost::format("Cannot convert the name of category \"%1%\" to an SWTrace namestring")
217                                   % categoryName);
218         return false;
219     }
220
221     // Process the event records
222     size_t counterCount = categoryCounters.size();
223     std::vector<EventRecord> eventRecords(counterCount);
224     std::vector<uint32_t> eventRecordOffsets(counterCount, 0);
225     size_t eventRecordsSize = 0;
226     uint32_t eventRecordsOffset =
227             numeric_cast<uint32_t>((eventRecords.size() + categoryNameBuffer.size()) * uint32_t_size);
228     for (size_t counterIndex = 0, eventRecordIndex = 0, eventRecordOffsetIndex = 0;
229          counterIndex < counterCount;
230          counterIndex++, eventRecordIndex++, eventRecordOffsetIndex++)
231     {
232         uint16_t counterUid = categoryCounters.at(counterIndex);
233         auto it = counters.find(counterUid);
234         BOOST_ASSERT(it != counters.end());
235         const CounterPtr& counter = it->second;
236
237         EventRecord& eventRecord = eventRecords.at(eventRecordIndex);
238         if (!CreateEventRecord(counter, eventRecord, errorMessage))
239         {
240             return false;
241         }
242
243         // Update the total size in words of the event records
244         eventRecordsSize += eventRecord.size();
245
246         // Add the event record offset to the event pointer table offset field
247         eventRecordOffsets[eventRecordOffsetIndex] = eventRecordsOffset;
248         eventRecordsOffset += numeric_cast<uint32_t>(eventRecord.size() * uint32_t_size);
249     }
250
251     // Category record word 3:
252     // 0:31 [32] name_offset (offset from the beginning of the category data pool to the name field)
253     uint32_t categoryRecordWord3 = numeric_cast<uint32_t>(eventRecordOffsets.size() * uint32_t_size);
254
255     // Calculate the size in words of the category record
256     size_t categoryRecordSize = 4u + // The size of the fixed part (device + counter_set + event_count + reserved +
257                                      // event_pointer_table_offset + name_offset)
258                                 eventRecordOffsets.size() + // The size of the variable part (the event pointer table +
259                                 categoryNameBuffer.size() + // and the category name including the null-terminator +
260                                 eventRecordsSize;           // the event records)
261
262     // Allocate the necessary space for the category record
263     categoryRecord.resize(categoryRecordSize);
264
265     ARMNN_NO_CONVERSION_WARN_BEGIN
266     // Create the category record
267     categoryRecord[0] = categoryRecordWord0; // device + counter_set
268     categoryRecord[1] = categoryRecordWord1; // event_count + reserved
269     categoryRecord[2] = categoryRecordWord2; // event_pointer_table_offset
270     categoryRecord[3] = categoryRecordWord3; // name_offset
271     auto offset = categoryRecord.begin() + 4u;
272     std::copy(eventRecordOffsets.begin(), eventRecordOffsets.end(), offset); // event_pointer_table
273     offset += eventRecordOffsets.size();
274     std::copy(categoryNameBuffer.begin(), categoryNameBuffer.end(), offset); // name
275     offset += categoryNameBuffer.size();
276     for (const EventRecord& eventRecord : eventRecords)
277     {
278         std::copy(eventRecord.begin(), eventRecord.end(), offset); // event_record
279         offset += eventRecord.size();
280     }
281     ARMNN_NO_CONVERSION_WARN_END
282
283     return true;
284 }
285
286 bool SendCounterPacket::CreateDeviceRecord(const DevicePtr& device,
287                                            DeviceRecord& deviceRecord,
288                                            std::string& errorMessage)
289 {
290     BOOST_ASSERT(device);
291
292     uint16_t deviceUid = device->m_Uid;
293     const std::string& deviceName = device->m_Name;
294     uint16_t deviceCores = device->m_Cores;
295
296     BOOST_ASSERT(!deviceName.empty());
297
298     // Device record word 0:
299     // 16:31 [16] uid: the unique identifier for the device
300     // 0:15  [16] cores: the number of individual streams of counters for one or more cores of some device
301     uint32_t deviceRecordWord0 = (static_cast<uint32_t>(deviceUid) << 16) |
302                                  (static_cast<uint32_t>(deviceCores));
303
304     // Device record word 1:
305     // 0:31 [32] name_offset: offset from the beginning of the device record pool to the name field
306     uint32_t deviceRecordWord1 = 0; // The offset is always zero here, as the name field is always
307                                     // the first (and only) item in the pool
308
309     // Convert the device name into a SWTrace string
310     std::vector<uint32_t> deviceNameBuffer;
311     if (!StringToSwTraceString<SwTraceCharPolicy>(deviceName, deviceNameBuffer))
312     {
313         errorMessage = boost::str(boost::format("Cannot convert the name of device %1% (\"%2%\") to an SWTrace string")
314                                   % deviceUid
315                                   % deviceName);
316         return false;
317     }
318
319     // Calculate the size in words of the device record
320     size_t deviceRecordSize = 2u + // The size of the fixed part (uid + cores + name_offset)
321                               deviceNameBuffer.size(); // The size of the variable part (the device name including
322                                                        // the null-terminator)
323
324     // Allocate the necessary space for the device record
325     deviceRecord.resize(deviceRecordSize);
326
327     // Create the device record
328     deviceRecord[0] = deviceRecordWord0; // uid + core
329     deviceRecord[1] = deviceRecordWord1; // name_offset
330     auto offset = deviceRecord.begin() + 2u;
331     std::copy(deviceNameBuffer.begin(), deviceNameBuffer.end(), offset); // name
332
333     return true;
334 }
335
336 bool SendCounterPacket::CreateCounterSetRecord(const CounterSetPtr& counterSet,
337                                                CounterSetRecord& counterSetRecord,
338                                                std::string& errorMessage)
339 {
340     BOOST_ASSERT(counterSet);
341
342     uint16_t counterSetUid = counterSet->m_Uid;
343     const std::string& counterSetName = counterSet->m_Name;
344     uint16_t counterSetCount = counterSet->m_Count;
345
346     BOOST_ASSERT(!counterSetName.empty());
347
348     // Counter set record word 0:
349     // 16:31 [16] uid: the unique identifier for the counter_set
350     // 0:15  [16] count: the number of counters which can be active in this set at any one time
351     uint32_t counterSetRecordWord0 = (static_cast<uint32_t>(counterSetUid) << 16) |
352                                      (static_cast<uint32_t>(counterSetCount));
353
354     // Counter set record word 1:
355     // 0:31 [32] name_offset: offset from the beginning of the counter set pool to the name field
356     uint32_t counterSetRecordWord1 = 0; // The offset is always zero here, as the name field is always
357                                         // the first (and only) item in the pool
358
359     // Convert the device name into a SWTrace namestring
360     std::vector<uint32_t> counterSetNameBuffer;
361     if (!StringToSwTraceString<SwTraceNameCharPolicy>(counterSet->m_Name, counterSetNameBuffer))
362     {
363         errorMessage = boost::str(boost::format("Cannot convert the name of counter set %1% (\"%2%\") to "
364                                                 "an SWTrace namestring")
365                                   % counterSetUid
366                                   % counterSetName);
367         return false;
368     }
369
370     // Calculate the size in words of the counter set record
371     size_t counterSetRecordSize = 2u + // The size of the fixed part (uid + cores + name_offset)
372                                   counterSetNameBuffer.size(); // The size of the variable part (the counter set name
373                                                                // including the null-terminator)
374
375     // Allocate the space for the counter set record
376     counterSetRecord.resize(counterSetRecordSize);
377
378     // Create the counter set record
379     counterSetRecord[0] = counterSetRecordWord0; // uid + core
380     counterSetRecord[1] = counterSetRecordWord1; // name_offset
381     auto offset = counterSetRecord.begin() + 2u;
382     std::copy(counterSetNameBuffer.begin(), counterSetNameBuffer.end(), offset); // name
383
384     return true;
385 }
386
387 bool SendCounterPacket::CreateEventRecord(const CounterPtr& counter,
388                                           EventRecord& eventRecord,
389                                           std::string& errorMessage)
390 {
391     using namespace boost::numeric;
392
393     BOOST_ASSERT(counter);
394
395     uint16_t           counterUid           = counter->m_Uid;
396     uint16_t           maxCounterUid        = counter->m_MaxCounterUid;
397     uint16_t           deviceUid            = counter->m_DeviceUid;
398     uint16_t           counterSetUid        = counter->m_CounterSetUid;
399     uint16_t           counterClass         = counter->m_Class;
400     uint16_t           counterInterpolation = counter->m_Interpolation;
401     double             counterMultiplier    = counter->m_Multiplier;
402     const std::string& counterName          = counter->m_Name;
403     const std::string& counterDescription   = counter->m_Description;
404     const std::string& counterUnits         = counter->m_Units;
405
406     BOOST_ASSERT(counterClass == 0 || counterClass == 1);
407     BOOST_ASSERT(counterInterpolation == 0 || counterInterpolation == 1);
408     BOOST_ASSERT(counterMultiplier);
409
410     // Utils
411     size_t uint32_t_size = sizeof(uint32_t);
412
413     // Event record word 0:
414     // 16:31 [16] max_counter_uid: if the device this event is associated with has more than one core and there
415     //                             is one of these counters per core this value will be set to
416     //                             (counter_uid + cores (from device_record)) - 1.
417     //                             If there is only a single core then this value will be the same as
418     //                             the counter_uid value
419     // 0:15  [16] count_uid: unique ID for the counter. Must be unique across all counters in all categories
420     uint32_t eventRecordWord0 = (static_cast<uint32_t>(maxCounterUid) << 16) |
421                                 (static_cast<uint32_t>(counterUid));
422
423     // Event record word 1:
424     // 16:31 [16] device: UID of the device this event is associated with. Set to zero if the event is NOT
425     //                    associated with a device
426     // 0:15  [16] counter_set: UID of the counter_set this event is associated with. Set to zero if the event
427     //                         is NOT associated with a counter_set
428     uint32_t eventRecordWord1 = (static_cast<uint32_t>(deviceUid) << 16) |
429                                 (static_cast<uint32_t>(counterSetUid));
430
431     // Event record word 2:
432     // 16:31 [16] class: type describing how to treat each data point in a stream of data points
433     // 0:15  [16] interpolation: type describing how to interpolate each data point in a stream of data points
434     uint32_t eventRecordWord2 = (static_cast<uint32_t>(counterClass) << 16) |
435                                 (static_cast<uint32_t>(counterInterpolation));
436
437     // Event record word 3-4:
438     // 0:63 [64] multiplier: internal data stream is represented as integer values, this allows scaling of
439     //                       those values as if they are fixed point numbers. Zero is not a valid value
440     uint32_t multiplier[2] = { 0u, 0u };
441     BOOST_ASSERT(sizeof(counterMultiplier) == sizeof(multiplier));
442     std::memcpy(multiplier, &counterMultiplier, sizeof(multiplier));
443     uint32_t eventRecordWord3 = multiplier[0];
444     uint32_t eventRecordWord4 = multiplier[1];
445
446     // Event record word 5:
447     // 0:31 [32] name_offset: offset from the beginning of the event record pool to the name field
448     uint32_t eventRecordWord5 = 0; // The offset is always zero here, as the name field is always
449                                    // the first item in the pool
450
451     // Convert the counter name into a SWTrace string
452     std::vector<uint32_t> counterNameBuffer;
453     if (!StringToSwTraceString<SwTraceCharPolicy>(counterName, counterNameBuffer))
454     {
455         errorMessage = boost::str(boost::format("Cannot convert the name of counter %1% (name: \"%2%\") "
456                                                 "to an SWTrace string")
457                                   % counterUid
458                                   % counterName);
459         return false;
460     }
461
462     // Event record word 6:
463     // 0:31 [32] description_offset: offset from the beginning of the event record pool to the description field
464     // The size of the name buffer in bytes
465     uint32_t eventRecordWord6 = numeric_cast<uint32_t>(counterNameBuffer.size() * uint32_t_size);
466
467     // Convert the counter description into a SWTrace string
468     std::vector<uint32_t> counterDescriptionBuffer;
469     if (!StringToSwTraceString<SwTraceCharPolicy>(counterDescription, counterDescriptionBuffer))
470     {
471         errorMessage = boost::str(boost::format("Cannot convert the description of counter %1% (description: \"%2%\") "
472                                                 "to an SWTrace string")
473                                   % counterUid
474                                   % counterName);
475         return false;
476     }
477
478     // Event record word 7:
479     // 0:31 [32] units_offset: (optional) offset from the beginning of the event record pool to the units field.
480     //                         An offset value of zero indicates this field is not provided
481     bool includeUnits = !counterUnits.empty();
482     // The size of the description buffer in bytes
483     uint32_t eventRecordWord7 = includeUnits ?
484                                 eventRecordWord6 +
485                                 numeric_cast<uint32_t>(counterDescriptionBuffer.size() * uint32_t_size) :
486                                 0;
487
488     // Convert the counter units into a SWTrace namestring (optional)
489     std::vector<uint32_t> counterUnitsBuffer;
490     if (includeUnits)
491     {
492         // Convert the counter units into a SWTrace namestring
493         if (!StringToSwTraceString<SwTraceNameCharPolicy>(counterUnits, counterUnitsBuffer))
494         {
495             errorMessage = boost::str(boost::format("Cannot convert the units of counter %1% (units: \"%2%\") "
496                                                     "to an SWTrace string")
497                                       % counterUid
498                                       % counterName);
499             return false;
500         }
501     }
502
503     // Calculate the size in words of the event record
504     size_t eventRecordSize = 8u + // The size of the fixed part (counter_uid + max_counter_uid + device +
505                                   //                             counter_set + class + interpolation +
506                                   //                             multiplier + name_offset + description_offset +
507                                   //                             units_offset)
508                              counterNameBuffer.size() +        // The size of the variable part (the counter name,
509                              counterDescriptionBuffer.size() + // description and units including the null-terminator)
510                              counterUnitsBuffer.size();
511
512     // Allocate the space for the event record
513     eventRecord.resize(eventRecordSize);
514
515     ARMNN_NO_CONVERSION_WARN_BEGIN
516     // Create the event record
517     eventRecord[0] = eventRecordWord0; // max_counter_uid + counter_uid
518     eventRecord[1] = eventRecordWord1; // device + counter_set
519     eventRecord[2] = eventRecordWord2; // class + interpolation
520     eventRecord[3] = eventRecordWord3; // multiplier
521     eventRecord[4] = eventRecordWord4; // multiplier
522     eventRecord[5] = eventRecordWord5; // name_offset
523     eventRecord[6] = eventRecordWord6; // description_offset
524     eventRecord[7] = eventRecordWord7; // units_offset
525     auto offset = eventRecord.begin() + 8u;
526     std::copy(counterNameBuffer.begin(), counterNameBuffer.end(), offset); // name
527     offset += counterNameBuffer.size();
528     std::copy(counterDescriptionBuffer.begin(), counterDescriptionBuffer.end(), offset); // description
529     if (includeUnits)
530     {
531         offset += counterDescriptionBuffer.size();
532         std::copy(counterUnitsBuffer.begin(), counterUnitsBuffer.end(), offset); // units
533     }
534     ARMNN_NO_CONVERSION_WARN_END
535
536     return true;
537 }
538
539 void SendCounterPacket::SendCounterDirectoryPacket(const ICounterDirectory& counterDirectory)
540 {
541     using namespace boost::numeric;
542
543     // Get the amount of data that needs to be put into the packet
544     uint16_t categoryCount    = counterDirectory.GetCategoryCount();
545     uint16_t deviceCount      = counterDirectory.GetDeviceCount();
546     uint16_t counterSetCount  = counterDirectory.GetCounterSetCount();
547
548     // Utils
549     size_t uint32_t_size = sizeof(uint32_t);
550     size_t packetHeaderSize = 2u;
551     size_t bodyHeaderSize = 6u;
552
553     // Initialize the offset for the pointer tables
554     uint32_t pointerTableOffset = 0;
555
556     // --------------
557     // Device records
558     // --------------
559
560     // Process device records
561     std::vector<DeviceRecord> deviceRecords(deviceCount);
562     const Devices& devices = counterDirectory.GetDevices();
563     std::vector<uint32_t> deviceRecordOffsets(deviceCount, 0); // device_records_pointer_table
564     size_t deviceRecordsSize = 0;
565     size_t deviceIndex = 0;
566     size_t deviceRecordOffsetIndex = 0;
567     for (auto it = devices.begin(); it != devices.end(); it++)
568     {
569         const DevicePtr& device = it->second;
570         DeviceRecord& deviceRecord = deviceRecords.at(deviceIndex);
571
572         std::string errorMessage;
573         if (!CreateDeviceRecord(device, deviceRecord, errorMessage))
574         {
575             CancelOperationAndThrow<RuntimeException>(errorMessage);
576         }
577
578         // Update the total size in words of the device records
579         deviceRecordsSize += deviceRecord.size();
580
581         // Add the device record offset to the device records pointer table offset field
582         deviceRecordOffsets[deviceRecordOffsetIndex] = pointerTableOffset;
583         pointerTableOffset += numeric_cast<uint32_t>(deviceRecord.size() * uint32_t_size);
584
585         deviceIndex++;
586         deviceRecordOffsetIndex++;
587     }
588
589     // -------------------
590     // Counter set records
591     // -------------------
592
593     // Process counter set records
594     std::vector<CounterSetRecord> counterSetRecords(counterSetCount);
595     const CounterSets& counterSets = counterDirectory.GetCounterSets();
596     std::vector<uint32_t> counterSetRecordOffsets(counterSetCount, 0); // counter_set_records_pointer_table
597     size_t counterSetRecordsSize = 0;
598     size_t counterSetIndex = 0;
599     size_t counterSetRecordOffsetIndex = 0;
600     for (auto it = counterSets.begin(); it != counterSets.end(); it++)
601     {
602         const CounterSetPtr& counterSet = it->second;
603         CounterSetRecord& counterSetRecord = counterSetRecords.at(counterSetIndex);
604
605         std::string errorMessage;
606         if (!CreateCounterSetRecord(counterSet, counterSetRecord, errorMessage))
607         {
608             CancelOperationAndThrow<RuntimeException>(errorMessage);
609         }
610
611         // Update the total size in words of the counter set records
612         counterSetRecordsSize += counterSetRecord.size();
613
614         // Add the counter set record offset to the counter set records pointer table offset field
615         counterSetRecordOffsets[counterSetRecordOffsetIndex] = pointerTableOffset;
616         pointerTableOffset += numeric_cast<uint32_t>(counterSetRecord.size() * uint32_t_size);
617
618         counterSetIndex++;
619         counterSetRecordOffsetIndex++;
620     }
621
622     // ----------------
623     // Category records
624     // ----------------
625
626     // Process category records
627     std::vector<CategoryRecord> categoryRecords(categoryCount);
628     const Categories& categories = counterDirectory.GetCategories();
629     std::vector<uint32_t> categoryRecordOffsets(categoryCount, 0); // category_records_pointer_table
630     size_t categoryRecordsSize = 0;
631     size_t categoryIndex = 0;
632     size_t categoryRecordOffsetIndex = 0;
633     for (auto it = categories.begin(); it != categories.end(); it++)
634     {
635         const CategoryPtr& category = *it;
636         CategoryRecord& categoryRecord = categoryRecords.at(categoryIndex);
637
638         std::string errorMessage;
639         if (!CreateCategoryRecord(category, counterDirectory.GetCounters(), categoryRecord, errorMessage))
640         {
641             CancelOperationAndThrow<RuntimeException>(errorMessage);
642         }
643
644         // Update the total size in words of the category records
645         categoryRecordsSize += categoryRecord.size();
646
647         // Add the category record offset to the category records pointer table offset field
648         categoryRecordOffsets[categoryRecordOffsetIndex] = pointerTableOffset;
649         pointerTableOffset += numeric_cast<uint32_t>(categoryRecord.size() * uint32_t_size);
650
651         categoryIndex++;
652         categoryRecordOffsetIndex++;
653     }
654
655
656
657     // Calculate the length in words of the counter directory packet's data (excludes the packet header size)
658     size_t counterDirectoryPacketDataLength =
659             bodyHeaderSize +                 // The size of the body header
660             deviceRecordOffsets.size() +     // The size of the device records pointer table
661             counterSetRecordOffsets.size() + // The size of counter set pointer table
662             categoryRecordOffsets.size() +   // The size of category records pointer table
663             deviceRecordsSize +              // The total size of the device records
664             counterSetRecordsSize +          // The total size of the counter set records
665             categoryRecordsSize;             // The total size of the category records
666
667     // Calculate the size in words of the counter directory packet (the data length plus the packet header size)
668     size_t counterDirectoryPacketSize = packetHeaderSize +                // The size of the packet header
669                                         counterDirectoryPacketDataLength; // The data length
670
671
672     // Allocate the necessary space for the counter directory packet
673     std::vector<uint32_t> counterDirectoryPacket(counterDirectoryPacketSize, 0);
674
675     // -------------
676     // Packet header
677     // -------------
678
679     // Packet header word 0:
680     // 26:31 [6]  packet_family: control Packet Family
681     // 16:25 [10] packet_id: packet identifier
682     // 8:15  [8]  reserved: all zeros
683     // 0:7   [8]  reserved: all zeros
684     uint32_t packetFamily = 0;
685     uint32_t packetId = 2;
686     uint32_t packetHeaderWord0 = ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16);
687
688     // Packet header word 1:
689     // 0:31 [32] data_length: length of data, in bytes
690     uint32_t packetHeaderWord1 = numeric_cast<uint32_t>(counterDirectoryPacketDataLength * uint32_t_size);
691
692     // Create the packet header
693     uint32_t packetHeader[2]
694     {
695         packetHeaderWord0, // packet_family + packet_id + reserved + reserved
696         packetHeaderWord1  // data_length
697     };
698
699     // -----------
700     // Body header
701     // -----------
702
703     // Body header word 0:
704     // 16:31 [16] device_records_count: number of entries in the device_records_pointer_table
705     // 0:15  [16] reserved: all zeros
706     uint32_t bodyHeaderWord0 = static_cast<uint32_t>(deviceCount) << 16;
707
708     // Body header word 1:
709     // 0:31 [32] device_records_pointer_table_offset: offset to the device_records_pointer_table
710     uint32_t bodyHeaderWord1 = 0; // The offset is always zero here, as the device record pointer table field is always
711                                   // the first item in the pool
712
713     // Body header word 2:
714     // 16:31 [16] counter_set_count: number of entries in the counter_set_pointer_table
715     // 0:15  [16] reserved: all zeros
716     uint32_t bodyHeaderWord2 = static_cast<uint32_t>(counterSetCount) << 16;
717
718     // Body header word 3:
719     // 0:31 [32] counter_set_pointer_table_offset: offset to the counter_set_pointer_table
720     uint32_t bodyHeaderWord3 =
721             numeric_cast<uint32_t>(deviceRecordOffsets.size() * uint32_t_size); // The size of the device records
722                                                                                 // pointer table
723
724
725     // Body header word 4:
726     // 16:31 [16] categories_count: number of entries in the categories_pointer_table
727     // 0:15  [16] reserved: all zeros
728     uint32_t bodyHeaderWord4 = static_cast<uint32_t>(categoryCount) << 16;
729
730     // Body header word 3:
731     // 0:31 [32] categories_pointer_table_offset: offset to the categories_pointer_table
732     uint32_t bodyHeaderWord5 =
733             numeric_cast<uint32_t>(deviceRecordOffsets.size() * uint32_t_size +     // The size of the device records
734                                    counterSetRecordOffsets.size() * uint32_t_size); // pointer table, plus the size of
735                                                                                     // the counter set pointer table
736
737     // Create the body header
738     uint32_t bodyHeader[6]
739     {
740         bodyHeaderWord0, // device_records_count + reserved
741         bodyHeaderWord1, // device_records_pointer_table_offset
742         bodyHeaderWord2, // counter_set_count + reserved
743         bodyHeaderWord3, // counter_set_pointer_table_offset
744         bodyHeaderWord4, // categories_count + reserved
745         bodyHeaderWord5  // categories_pointer_table_offset
746     };
747
748     ARMNN_NO_CONVERSION_WARN_BEGIN
749     // Create the counter directory packet
750     auto counterDirectoryPacketOffset = counterDirectoryPacket.begin();
751     // packet_header
752     std::copy(packetHeader, packetHeader + packetHeaderSize, counterDirectoryPacketOffset);
753     counterDirectoryPacketOffset += packetHeaderSize;
754     // body_header
755     std::copy(bodyHeader, bodyHeader + bodyHeaderSize, counterDirectoryPacketOffset);
756     counterDirectoryPacketOffset += bodyHeaderSize;
757     // device_records_pointer_table
758     std::copy(deviceRecordOffsets.begin(), deviceRecordOffsets.end(), counterDirectoryPacketOffset);
759     counterDirectoryPacketOffset += deviceRecordOffsets.size();
760     // counter_set_pointer_table
761     std::copy(counterSetRecordOffsets.begin(), counterSetRecordOffsets.end(), counterDirectoryPacketOffset);
762     counterDirectoryPacketOffset += counterSetRecordOffsets.size();
763     // category_pointer_table
764     std::copy(categoryRecordOffsets.begin(), categoryRecordOffsets.end(), counterDirectoryPacketOffset);
765     counterDirectoryPacketOffset += categoryRecordOffsets.size();
766     // device_records
767     for (const DeviceRecord& deviceRecord : deviceRecords)
768     {
769         std::copy(deviceRecord.begin(), deviceRecord.end(), counterDirectoryPacketOffset); // device_record
770         counterDirectoryPacketOffset += deviceRecord.size();
771     }
772     // counter_set_records
773     for (const CounterSetRecord& counterSetRecord : counterSetRecords)
774     {
775         std::copy(counterSetRecord.begin(), counterSetRecord.end(), counterDirectoryPacketOffset); // counter_set_record
776         counterDirectoryPacketOffset += counterSetRecord.size();
777     }
778     // category_records
779     for (const CategoryRecord& categoryRecord : categoryRecords)
780     {
781         std::copy(categoryRecord.begin(), categoryRecord.end(), counterDirectoryPacketOffset); // category_record
782         counterDirectoryPacketOffset += categoryRecord.size();
783     }
784     ARMNN_NO_CONVERSION_WARN_END
785
786     // Calculate the total size in bytes of the counter directory packet
787     uint32_t totalSize = numeric_cast<uint32_t>(counterDirectoryPacketSize * uint32_t_size);
788
789     // Reserve space in the buffer for the packet
790     uint32_t reserved = 0;
791     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
792
793     if (writeBuffer == nullptr || reserved < totalSize)
794     {
795         CancelOperationAndThrow<BufferExhaustion>(
796             writeBuffer,
797             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
798     }
799
800     // Offset for writing to the buffer
801     uint32_t offset = 0;
802
803     // Write the counter directory packet to the buffer
804     for (uint32_t counterDirectoryPacketWord : counterDirectoryPacket)
805     {
806         WriteUint32(writeBuffer, offset, counterDirectoryPacketWord);
807         offset += numeric_cast<uint32_t>(uint32_t_size);
808     }
809
810     m_BufferManager.Commit(writeBuffer, totalSize);
811 }
812
813 void SendCounterPacket::SendPeriodicCounterCapturePacket(uint64_t timestamp, const IndexValuePairsVector& values)
814 {
815     uint32_t uint16_t_size = sizeof(uint16_t);
816     uint32_t uint32_t_size = sizeof(uint32_t);
817     uint32_t uint64_t_size = sizeof(uint64_t);
818
819     uint32_t packetFamily = 3;
820     uint32_t packetClass = 0;
821     uint32_t packetType = 0;
822     uint32_t headerSize = 2 * uint32_t_size;
823     uint32_t bodySize = uint64_t_size + numeric_cast<uint32_t>(values.size()) * (uint16_t_size + uint32_t_size);
824     uint32_t totalSize = headerSize + bodySize;
825     uint32_t offset = 0;
826     uint32_t reserved = 0;
827
828     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
829
830     if (writeBuffer == nullptr || reserved < totalSize)
831     {
832         CancelOperationAndThrow<BufferExhaustion>(
833             writeBuffer,
834             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
835     }
836
837     // Create header.
838     WriteUint32(writeBuffer,
839                 offset,
840                 ((packetFamily & 0x0000003F) << 26) |
841                 ((packetClass  & 0x0000007F) << 19) |
842                 ((packetType   & 0x00000007) << 16));
843     offset += uint32_t_size;
844     WriteUint32(writeBuffer, offset, bodySize);
845
846     // Copy captured Timestamp.
847     offset += uint32_t_size;
848     WriteUint64(writeBuffer, offset, timestamp);
849
850     // Copy selectedCounterIds.
851     offset += uint64_t_size;
852     for (const auto& pair: values)
853     {
854         WriteUint16(writeBuffer, offset, pair.first);
855         offset += uint16_t_size;
856         WriteUint32(writeBuffer, offset, pair.second);
857         offset += uint32_t_size;
858     }
859
860     m_BufferManager.Commit(writeBuffer, totalSize);
861 }
862
863 void SendCounterPacket::SendPeriodicCounterSelectionPacket(uint32_t capturePeriod,
864                                                            const std::vector<uint16_t>& selectedCounterIds)
865 {
866     uint32_t uint16_t_size = sizeof(uint16_t);
867     uint32_t uint32_t_size = sizeof(uint32_t);
868
869     uint32_t packetFamily = 0;
870     uint32_t packetId = 4;
871     uint32_t headerSize = 2 * uint32_t_size;
872     uint32_t bodySize = uint32_t_size + numeric_cast<uint32_t>(selectedCounterIds.size()) * uint16_t_size;
873     uint32_t totalSize = headerSize + bodySize;
874     uint32_t offset = 0;
875     uint32_t reserved = 0;
876
877     IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
878
879     if (writeBuffer == nullptr || reserved < totalSize)
880     {
881         CancelOperationAndThrow<BufferExhaustion>(
882             writeBuffer,
883             boost::str(boost::format("No space left in buffer. Unable to reserve (%1%) bytes.") % totalSize));
884     }
885
886     // Create header.
887     WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16));
888     offset += uint32_t_size;
889     WriteUint32(writeBuffer, offset, bodySize);
890
891     // Copy capturePeriod.
892     offset += uint32_t_size;
893     WriteUint32(writeBuffer, offset, capturePeriod);
894
895     // Copy selectedCounterIds.
896     offset += uint32_t_size;
897     for(const uint16_t& id: selectedCounterIds)
898     {
899         WriteUint16(writeBuffer, offset, id);
900         offset += uint16_t_size;
901     }
902
903     m_BufferManager.Commit(writeBuffer, totalSize);
904 }
905
906 void SendCounterPacket::SetReadyToRead()
907 {
908     // We need to wait for the send thread to release its mutex
909     {
910         std::lock_guard<std::mutex> lck(m_WaitMutex);
911         m_ReadyToRead = true;
912     }
913     // Signal the send thread that there's something to read in the buffer
914     m_WaitCondition.notify_one();
915 }
916
917 void SendCounterPacket::Start(IProfilingConnection& profilingConnection)
918 {
919     // Check if the send thread is already running
920     if (m_IsRunning.load())
921     {
922         // The send thread is already running
923         return;
924     }
925
926     if (m_SendThread.joinable())
927     {
928         m_SendThread.join();
929     }
930
931     // Mark the send thread as running
932     m_IsRunning.store(true);
933
934     // Keep the send procedure going until the send thread is signalled to stop
935     m_KeepRunning.store(true);
936
937     // Make sure the send thread will not flush the buffer until signaled to do so
938     // no need for a mutex as the send thread can not be running at this point
939     m_ReadyToRead = false;
940
941     m_PacketSent = false;
942
943     // Start the send thread
944     m_SendThread = std::thread(&SendCounterPacket::Send, this, std::ref(profilingConnection));
945 }
946
947 void SendCounterPacket::Stop(bool rethrowSendThreadExceptions)
948 {
949     // Signal the send thread to stop
950     m_KeepRunning.store(false);
951
952     // Check that the send thread is running
953     if (m_SendThread.joinable())
954     {
955         // Kick the send thread out of the wait condition
956         SetReadyToRead();
957         // Wait for the send thread to complete operations
958         m_SendThread.join();
959     }
960
961     // Check if the send thread exception has to be rethrown
962     if (!rethrowSendThreadExceptions)
963     {
964         // No need to rethrow the send thread exception, return immediately
965         return;
966     }
967
968     // Check if there's an exception to rethrow
969     if (m_SendThreadException)
970     {
971         // Rethrow the send thread exception
972         std::rethrow_exception(m_SendThreadException);
973
974         // Nullify the exception as it has been rethrown
975         m_SendThreadException = nullptr;
976     }
977 }
978
979 void SendCounterPacket::Send(IProfilingConnection& profilingConnection)
980 {
981     // Run once and keep the sending procedure looping until the thread is signalled to stop
982     do
983     {
984         // Check the current state of the profiling service
985         ProfilingState currentState = m_StateMachine.GetCurrentState();
986         switch (currentState)
987         {
988         case ProfilingState::Uninitialised:
989         case ProfilingState::NotConnected:
990
991             // The send thread cannot be running when the profiling service is uninitialized or not connected,
992             // stop the thread immediately
993             m_KeepRunning.store(false);
994             m_IsRunning.store(false);
995
996             // An exception should be thrown here, save it to be rethrown later from the main thread so that
997             // it can be caught by the consumer
998             m_SendThreadException =
999                     std::make_exception_ptr(RuntimeException("The send thread should not be running with the "
1000                                                              "profiling service not yet initialized or connected"));
1001
1002             return;
1003         case ProfilingState::WaitingForAck:
1004
1005             // Send out a StreamMetadata packet and wait for the profiling connection to be acknowledged.
1006             // When a ConnectionAcknowledged packet is received, the profiling service state will be automatically
1007             // updated by the command handler
1008
1009             // Prepare a StreamMetadata packet and write it to the Counter Stream buffer
1010             SendStreamMetaDataPacket();
1011
1012              // Flush the buffer manually to send the packet
1013             FlushBuffer(profilingConnection);
1014
1015             // Wait for a connection ack from the remote server. We should expect a response within timeout value.
1016             // If not, drop back to the start of the loop and detect somebody closing the thread. Then send the
1017             // StreamMetadata again.
1018
1019             // Wait condition lock scope - Begin
1020             {
1021                 std::unique_lock<std::mutex> lock(m_WaitMutex);
1022
1023                 bool timeout = m_WaitCondition.wait_for(lock,
1024                                                         std::chrono::milliseconds(m_Timeout),
1025                                                         [&]{ return m_ReadyToRead; });
1026                 // If we get notified we need to flush the buffer again
1027                 if(timeout)
1028                 {
1029                     // Otherwise if we just timed out don't flush the buffer
1030                     continue;
1031                 }
1032                 //reset condition variable predicate for next use
1033                 m_ReadyToRead = false;
1034             }
1035             // Wait condition lock scope - End
1036             break;
1037         case ProfilingState::Active:
1038         default:
1039             // Wait condition lock scope - Begin
1040             {
1041                 std::unique_lock<std::mutex> lock(m_WaitMutex);
1042
1043                 // Normal working state for the send thread
1044                 // Check if the send thread is required to enforce a timeout wait policy
1045                 if (m_Timeout < 0)
1046                 {
1047                     // Wait indefinitely until notified that something to read has become available in the buffer
1048                     m_WaitCondition.wait(lock, [&] { return m_ReadyToRead; });
1049                 }
1050                 else
1051                 {
1052                     // Wait until the thread is notified of something to read from the buffer,
1053                     // or check anyway after the specified number of milliseconds
1054                     m_WaitCondition.wait_for(lock, std::chrono::milliseconds(m_Timeout), [&] { return m_ReadyToRead; });
1055                 }
1056
1057                 //reset condition variable predicate for next use
1058                 m_ReadyToRead = false;
1059             }
1060             // Wait condition lock scope - End
1061             break;
1062         }
1063
1064         // Send all the available packets in the buffer
1065         FlushBuffer(profilingConnection);
1066     } while (m_KeepRunning.load());
1067
1068     // Ensure that all readable data got written to the profiling connection before the thread is stopped
1069     // (do not notify any watcher in this case, as this is just to wrap up things before shutting down the send thread)
1070     FlushBuffer(profilingConnection, false);
1071
1072     // Mark the send thread as not running
1073     m_IsRunning.store(false);
1074 }
1075
1076 void SendCounterPacket::FlushBuffer(IProfilingConnection& profilingConnection, bool notifyWatchers)
1077 {
1078     // Get the first available readable buffer
1079     IPacketBufferPtr packetBuffer = m_BufferManager.GetReadableBuffer();
1080
1081     // Initialize the flag that indicates whether at least a packet has been sent
1082     bool packetsSent = false;
1083
1084     while (packetBuffer != nullptr)
1085     {
1086         // Get the data to send from the buffer
1087         const unsigned char* readBuffer = packetBuffer->GetReadableData();
1088         unsigned int readBufferSize = packetBuffer->GetSize();
1089
1090         if (readBuffer == nullptr || readBufferSize == 0)
1091         {
1092             // Nothing to send, get the next available readable buffer and continue
1093             m_BufferManager.MarkRead(packetBuffer);
1094             packetBuffer = m_BufferManager.GetReadableBuffer();
1095
1096             continue;
1097         }
1098
1099         // Check that the profiling connection is open, silently drop the data and continue if it's closed
1100         if (profilingConnection.IsOpen())
1101         {
1102             // Write a packet to the profiling connection. Silently ignore any write error and continue
1103             profilingConnection.WritePacket(readBuffer, boost::numeric_cast<uint32_t>(readBufferSize));
1104
1105             // Set the flag that indicates whether at least a packet has been sent
1106             packetsSent = true;
1107         }
1108
1109         // Mark the packet buffer as read
1110         m_BufferManager.MarkRead(packetBuffer);
1111
1112         // Get the next available readable buffer
1113         packetBuffer = m_BufferManager.GetReadableBuffer();
1114     }
1115     // Check whether at least a packet has been sent
1116     if (packetsSent && notifyWatchers)
1117     {
1118         // Wait for the parent thread to release its mutex if necessary
1119         {
1120             std::lock_guard<std::mutex> lck(m_PacketSentWaitMutex);
1121             m_PacketSent = true;
1122         }
1123         // Notify to any watcher that something has been sent
1124         m_PacketSentWaitCondition.notify_one();
1125     }
1126 }
1127
1128 bool SendCounterPacket::WaitForPacketSent(uint32_t timeout = 1000)
1129 {
1130     std::unique_lock<std::mutex> lock(m_PacketSentWaitMutex);
1131     // Blocks until notified that at least a packet has been sent or until timeout expires.
1132     bool timedOut = m_PacketSentWaitCondition.wait_for(lock,
1133                                                        std::chrono::milliseconds(timeout),
1134                                                        [&] { return m_PacketSent; });
1135
1136     m_PacketSent = false;
1137
1138     return timedOut;
1139 }
1140
1141 } // namespace profiling
1142
1143 } // namespace armnn