IVGCVSW-4835 Change CounterSet and Device name offsets sizes to bytes
[platform/upstream/armnn.git] / src / profiling / test / SendCounterPacketTests.cpp
index c3ad50c..e7e2312 100644 (file)
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "ProfilingMocks.hpp"
+#include "ProfilingTestUtils.hpp"
 #include "SendCounterPacketTests.hpp"
 
 #include <BufferManager.hpp>
 #include <EncodeVersion.hpp>
 #include <ProfilingUtils.hpp>
 #include <SendCounterPacket.hpp>
+#include <Processes.hpp>
 
 #include <armnn/Exceptions.hpp>
 #include <armnn/Conversion.hpp>
+#include <armnn/Utils.hpp>
+
+#include <common/include/Constants.hpp>
+
 
 #include <boost/test/unit_test.hpp>
 #include <boost/numeric/conversion/cast.hpp>
@@ -25,7 +32,7 @@ namespace
 {
 
 // A short delay to wait for the thread to process a packet.
-uint16_t constexpr WAIT_UNTIL_READABLE_MS = 100;
+uint16_t constexpr WAIT_UNTIL_READABLE_MS = 20;
 
 void SetNotConnectedProfilingState(ProfilingStateMachine& profilingStateMachine)
 {
@@ -34,9 +41,12 @@ void SetNotConnectedProfilingState(ProfilingStateMachine& profilingStateMachine)
     {
     case ProfilingState::WaitingForAck:
         profilingStateMachine.TransitionToState(ProfilingState::Active);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::Uninitialised:
+        ARMNN_FALLTHROUGH;
     case ProfilingState::Active:
         profilingStateMachine.TransitionToState(ProfilingState::NotConnected);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::NotConnected:
         return;
     default:
@@ -50,10 +60,13 @@ void SetWaitingForAckProfilingState(ProfilingStateMachine& profilingStateMachine
     switch (currentState)
     {
     case ProfilingState::Uninitialised:
+        ARMNN_FALLTHROUGH;
     case ProfilingState::Active:
         profilingStateMachine.TransitionToState(ProfilingState::NotConnected);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::NotConnected:
         profilingStateMachine.TransitionToState(ProfilingState::WaitingForAck);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::WaitingForAck:
         return;
     default:
@@ -68,10 +81,13 @@ void SetActiveProfilingState(ProfilingStateMachine& profilingStateMachine)
     {
     case ProfilingState::Uninitialised:
         profilingStateMachine.TransitionToState(ProfilingState::NotConnected);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::NotConnected:
         profilingStateMachine.TransitionToState(ProfilingState::WaitingForAck);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::WaitingForAck:
         profilingStateMachine.TransitionToState(ProfilingState::Active);
+        ARMNN_FALLTHROUGH;
     case ProfilingState::Active:
         return;
     default:
@@ -83,6 +99,8 @@ void SetActiveProfilingState(ProfilingStateMachine& profilingStateMachine)
 
 BOOST_AUTO_TEST_SUITE(SendCounterPacketTests)
 
+using PacketType = MockProfilingConnection::PacketType;
+
 BOOST_AUTO_TEST_CASE(MockSendCounterPacketTest)
 {
     MockBufferManager mockBuffer(512);
@@ -108,7 +126,7 @@ BOOST_AUTO_TEST_CASE(MockSendCounterPacketTest)
     mockBuffer.MarkRead(packetBuffer);
 
     uint64_t timestamp = 0;
-    std::vector<std::pair<uint16_t, uint32_t>> indexValuePairs;
+    std::vector<CounterValue> indexValuePairs;
 
     mockSendCounterPacket.SendPeriodicCounterCapturePacket(timestamp, indexValuePairs);
 
@@ -133,11 +151,9 @@ BOOST_AUTO_TEST_CASE(MockSendCounterPacketTest)
 
 BOOST_AUTO_TEST_CASE(SendPeriodicCounterSelectionPacketTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // Error no space left in buffer
     MockBufferManager mockBuffer1(10);
-    SendCounterPacket sendPacket1(profilingStateMachine, mockBuffer1);
+    SendCounterPacket sendPacket1(mockBuffer1);
 
     uint32_t capturePeriod = 1000;
     std::vector<uint16_t> selectedCounterIds;
@@ -146,7 +162,7 @@ BOOST_AUTO_TEST_CASE(SendPeriodicCounterSelectionPacketTest)
 
     // Packet without any counters
     MockBufferManager mockBuffer2(512);
-    SendCounterPacket sendPacket2(profilingStateMachine, mockBuffer2);
+    SendCounterPacket sendPacket2(mockBuffer2);
 
     sendPacket2.SendPeriodicCounterSelectionPacket(capturePeriod, selectedCounterIds);
     auto readBuffer2 = mockBuffer2.GetReadableBuffer();
@@ -162,7 +178,7 @@ BOOST_AUTO_TEST_CASE(SendPeriodicCounterSelectionPacketTest)
 
     // Full packet message
     MockBufferManager mockBuffer3(512);
-    SendCounterPacket sendPacket3(profilingStateMachine, mockBuffer3);
+    SendCounterPacket sendPacket3(mockBuffer3);
 
     selectedCounterIds.reserve(5);
     selectedCounterIds.emplace_back(100);
@@ -200,18 +216,18 @@ BOOST_AUTO_TEST_CASE(SendPeriodicCounterCapturePacketTest)
 
     // Error no space left in buffer
     MockBufferManager mockBuffer1(10);
-    SendCounterPacket sendPacket1(profilingStateMachine, mockBuffer1);
+    SendCounterPacket sendPacket1(mockBuffer1);
 
     auto captureTimestamp = std::chrono::steady_clock::now();
     uint64_t time =  static_cast<uint64_t >(captureTimestamp.time_since_epoch().count());
-    std::vector<std::pair<uint16_t, uint32_t>> indexValuePairs;
+    std::vector<CounterValue> indexValuePairs;
 
     BOOST_CHECK_THROW(sendPacket1.SendPeriodicCounterCapturePacket(time, indexValuePairs),
                       BufferExhaustion);
 
     // Packet without any counters
     MockBufferManager mockBuffer2(512);
-    SendCounterPacket sendPacket2(profilingStateMachine, mockBuffer2);
+    SendCounterPacket sendPacket2(mockBuffer2);
 
     sendPacket2.SendPeriodicCounterCapturePacket(time, indexValuePairs);
     auto readBuffer2 = mockBuffer2.GetReadableBuffer();
@@ -228,14 +244,14 @@ BOOST_AUTO_TEST_CASE(SendPeriodicCounterCapturePacketTest)
 
     // Full packet message
     MockBufferManager mockBuffer3(512);
-    SendCounterPacket sendPacket3(profilingStateMachine, mockBuffer3);
+    SendCounterPacket sendPacket3(mockBuffer3);
 
     indexValuePairs.reserve(5);
-    indexValuePairs.emplace_back(std::make_pair<uint16_t, uint32_t >(0, 100));
-    indexValuePairs.emplace_back(std::make_pair<uint16_t, uint32_t >(1, 200));
-    indexValuePairs.emplace_back(std::make_pair<uint16_t, uint32_t >(2, 300));
-    indexValuePairs.emplace_back(std::make_pair<uint16_t, uint32_t >(3, 400));
-    indexValuePairs.emplace_back(std::make_pair<uint16_t, uint32_t >(4, 500));
+    indexValuePairs.emplace_back(CounterValue{0, 100});
+    indexValuePairs.emplace_back(CounterValue{1, 200});
+    indexValuePairs.emplace_back(CounterValue{2, 300});
+    indexValuePairs.emplace_back(CounterValue{3, 400});
+    indexValuePairs.emplace_back(CounterValue{4, 500});
     sendPacket3.SendPeriodicCounterCapturePacket(time, indexValuePairs);
     auto readBuffer3 = mockBuffer3.GetReadableBuffer();
 
@@ -277,30 +293,24 @@ BOOST_AUTO_TEST_CASE(SendStreamMetaDataPacketTest)
 
     uint32_t sizeUint32 = numeric_cast<uint32_t>(sizeof(uint32_t));
 
-    ProfilingStateMachine profilingStateMachine;
-
     // Error no space left in buffer
     MockBufferManager mockBuffer1(10);
-    SendCounterPacket sendPacket1(profilingStateMachine, mockBuffer1);
+    SendCounterPacket sendPacket1(mockBuffer1);
     BOOST_CHECK_THROW(sendPacket1.SendStreamMetaDataPacket(), armnn::profiling::BufferExhaustion);
 
     // Full metadata packet
 
     std::string processName = GetProcessName().substr(0, 60);
 
-    uint32_t infoSize = numeric_cast<uint32_t>(GetSoftwareInfo().size()) > 0 ?
-                        numeric_cast<uint32_t>(GetSoftwareInfo().size()) + 1 : 0;
-    uint32_t hardwareVersionSize = numeric_cast<uint32_t>(GetHardwareVersion().size()) > 0 ?
-                                   numeric_cast<uint32_t>(GetHardwareVersion().size()) + 1 : 0;
-    uint32_t softwareVersionSize = numeric_cast<uint32_t>(GetSoftwareVersion().size()) > 0 ?
-                                   numeric_cast<uint32_t>(GetSoftwareVersion().size()) + 1 : 0;
-    uint32_t processNameSize = numeric_cast<uint32_t>(processName.size()) > 0 ?
-                               numeric_cast<uint32_t>(processName.size()) + 1 : 0;
+    uint32_t infoSize =            numeric_cast<uint32_t>(GetSoftwareInfo().size()) + 1;
+    uint32_t hardwareVersionSize = numeric_cast<uint32_t>(GetHardwareVersion().size()) + 1;
+    uint32_t softwareVersionSize = numeric_cast<uint32_t>(GetSoftwareVersion().size()) + 1;
+    uint32_t processNameSize =     numeric_cast<uint32_t>(processName.size()) + 1;
 
     uint32_t packetEntries = 6;
 
     MockBufferManager mockBuffer2(512);
-    SendCounterPacket sendPacket2(profilingStateMachine, mockBuffer2);
+    SendCounterPacket sendPacket2(mockBuffer2);
     sendPacket2.SendStreamMetaDataPacket();
     auto readBuffer2 = mockBuffer2.GetReadableBuffer();
 
@@ -317,28 +327,29 @@ BOOST_AUTO_TEST_CASE(SendStreamMetaDataPacketTest)
     BOOST_TEST(headerWord1 == totalLength - (2 * sizeUint32)); // data length
 
     uint32_t offset = sizeUint32 * 2;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == SendCounterPacket::PIPE_MAGIC); // pipe_magic
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == armnnProfiling::PIPE_MAGIC); // pipe_magic
     offset += sizeUint32;
     BOOST_TEST(ReadUint32(readBuffer2, offset) == EncodeVersion(1, 0, 0)); // stream_metadata_version
     offset += sizeUint32;
     BOOST_TEST(ReadUint32(readBuffer2, offset) == MAX_METADATA_PACKET_LENGTH); // max_data_len
     offset += sizeUint32;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == numeric_cast<uint32_t>(getpid())); // pid
+    int pid = armnnUtils::Processes::GetCurrentId();
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == numeric_cast<uint32_t>(pid));
     offset += sizeUint32;
     uint32_t poolOffset = 10 * sizeUint32;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == (infoSize ? poolOffset : 0)); // offset_info
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == poolOffset); // offset_info
     offset += sizeUint32;
     poolOffset += infoSize;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == (hardwareVersionSize ? poolOffset : 0)); // offset_hw_version
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == poolOffset); // offset_hw_version
     offset += sizeUint32;
     poolOffset += hardwareVersionSize;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == (softwareVersionSize ? poolOffset : 0)); // offset_sw_version
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == poolOffset); // offset_sw_version
     offset += sizeUint32;
     poolOffset += softwareVersionSize;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == (processNameSize ? poolOffset : 0)); // offset_process_name
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == poolOffset); // offset_process_name
     offset += sizeUint32;
     poolOffset += processNameSize;
-    BOOST_TEST(ReadUint32(readBuffer2, offset) == (packetEntries ? poolOffset : 0)); // offset_packet_version_table
+    BOOST_TEST(ReadUint32(readBuffer2, offset) == poolOffset); // offset_packet_version_table
     offset += sizeUint32;
     BOOST_TEST(ReadUint32(readBuffer2, offset) == 0); // reserved
 
@@ -394,10 +405,8 @@ BOOST_AUTO_TEST_CASE(SendStreamMetaDataPacketTest)
 
 BOOST_AUTO_TEST_CASE(CreateDeviceRecordTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a device for testing
     uint16_t deviceUid = 27;
@@ -421,17 +430,15 @@ BOOST_AUTO_TEST_CASE(CreateDeviceRecordTest)
     };
     BOOST_CHECK(deviceRecordWord0[0] == deviceUid); // uid
     BOOST_CHECK(deviceRecordWord0[1] == deviceCores); // cores
-    BOOST_CHECK(deviceRecord[1] == 0); // name_offset
+    BOOST_CHECK(deviceRecord[1] == 8); // name_offset
     BOOST_CHECK(deviceRecord[2] == deviceName.size() + 1); // The length of the SWTrace string (name)
     BOOST_CHECK(std::memcmp(deviceRecord.data() + 3, deviceName.data(), deviceName.size()) == 0); // name
 }
 
 BOOST_AUTO_TEST_CASE(CreateInvalidDeviceRecordTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a device for testing
     uint16_t deviceUid = 27;
@@ -451,10 +458,8 @@ BOOST_AUTO_TEST_CASE(CreateInvalidDeviceRecordTest)
 
 BOOST_AUTO_TEST_CASE(CreateCounterSetRecordTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter set for testing
     uint16_t counterSetUid = 27;
@@ -478,17 +483,15 @@ BOOST_AUTO_TEST_CASE(CreateCounterSetRecordTest)
     };
     BOOST_CHECK(counterSetRecordWord0[0] == counterSetUid); // uid
     BOOST_CHECK(counterSetRecordWord0[1] == counterSetCount); // cores
-    BOOST_CHECK(counterSetRecord[1] == 0); // name_offset
+    BOOST_CHECK(counterSetRecord[1] == 8); // name_offset
     BOOST_CHECK(counterSetRecord[2] == counterSetName.size() + 1); // The length of the SWTrace string (name)
     BOOST_CHECK(std::memcmp(counterSetRecord.data() + 3, counterSetName.data(), counterSetName.size()) == 0); // name
 }
 
 BOOST_AUTO_TEST_CASE(CreateInvalidCounterSetRecordTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter set for testing
     uint16_t counterSetUid = 27;
@@ -508,10 +511,8 @@ BOOST_AUTO_TEST_CASE(CreateInvalidCounterSetRecordTest)
 
 BOOST_AUTO_TEST_CASE(CreateEventRecordTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter for testing
     uint16_t counterUid = 7256;
@@ -535,7 +536,7 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordTest)
                                                          counterUnits,
                                                          deviceUid,
                                                          counterSetUid);
-    BOOST_ASSERT(counter);
+    ARMNN_ASSERT(counter);
 
     // Create an event record
     SendCounterPacket::EventRecord eventRecord;
@@ -566,33 +567,39 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordTest)
         eventRecord[3],
         eventRecord[4]
     };
+
     BOOST_CHECK(eventRecordWord0[0] == maxCounterUid); // max_counter_uid
     BOOST_CHECK(eventRecordWord0[1] == counterUid); // counter_uid
     BOOST_CHECK(eventRecordWord1[0] == deviceUid); // device
+
     BOOST_CHECK(eventRecordWord1[1] == counterSetUid); // counter_set
     BOOST_CHECK(eventRecordWord2[0] == counterClass); // class
     BOOST_CHECK(eventRecordWord2[1] == counterInterpolation); // interpolation
     BOOST_CHECK(std::memcmp(eventRecordWord34, &counterMultiplier, sizeof(counterMultiplier)) == 0); // multiplier
 
     ARMNN_NO_CONVERSION_WARN_BEGIN
-    uint32_t counterNameOffset = 0; // The name is the first item in pool
+    uint32_t eventRecordBlockSize = 8u * sizeof(uint32_t);
+    uint32_t counterNameOffset = eventRecordBlockSize; // The name is the first item in pool
     uint32_t counterDescriptionOffset = counterNameOffset + // Counter name offset
                                         4u + // Counter name length (uint32_t)
                                         counterName.size() + // 18u
                                         1u + // Null-terminator
                                         1u; // Rounding to the next word
+
     size_t counterUnitsOffset = counterDescriptionOffset + // Counter description offset
                                 4u + // Counter description length (uint32_t)
                                 counterDescription.size() + // 21u
                                 1u + // Null-terminator
-                                2u; // Rounding to the next word
+                                2u;  // Rounding to the next word
+
     ARMNN_NO_CONVERSION_WARN_END
 
     BOOST_CHECK(eventRecord[5] == counterNameOffset); // name_offset
     BOOST_CHECK(eventRecord[6] == counterDescriptionOffset); // description_offset
     BOOST_CHECK(eventRecord[7] == counterUnitsOffset); // units_offset
 
-    auto eventRecordPool = reinterpret_cast<unsigned char*>(eventRecord.data() + 8u); // The start of the pool
+    // Offsets are relative to the start of the eventRecord
+    auto eventRecordPool = reinterpret_cast<unsigned char*>(eventRecord.data());
     size_t uint32_t_size = sizeof(uint32_t);
 
     // The length of the SWTrace string (name)
@@ -631,10 +638,8 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordTest)
 
 BOOST_AUTO_TEST_CASE(CreateEventRecordNoUnitsTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter for testing
     uint16_t counterUid = 44312;
@@ -657,7 +662,7 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordNoUnitsTest)
                                                          "",
                                                          deviceUid,
                                                          counterSetUid);
-    BOOST_ASSERT(counter);
+    ARMNN_ASSERT(counter);
 
     // Create an event record
     SendCounterPacket::EventRecord eventRecord;
@@ -697,7 +702,8 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordNoUnitsTest)
     BOOST_CHECK(std::memcmp(eventRecordWord34, &counterMultiplier, sizeof(counterMultiplier)) == 0); // multiplier
 
     ARMNN_NO_CONVERSION_WARN_BEGIN
-    uint32_t counterNameOffset = 0; // The name is the first item in pool
+    uint32_t eventRecordBlockSize = 8u * sizeof(uint32_t);
+    uint32_t counterNameOffset = eventRecordBlockSize; // The name is the first item in pool
     uint32_t counterDescriptionOffset = counterNameOffset + // Counter name offset
                                         4u + // Counter name length (uint32_t)
                                         counterName.size() + // 18u
@@ -709,7 +715,8 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordNoUnitsTest)
     BOOST_CHECK(eventRecord[6] == counterDescriptionOffset); // description_offset
     BOOST_CHECK(eventRecord[7] == 0); // units_offset
 
-    auto eventRecordPool = reinterpret_cast<unsigned char*>(eventRecord.data() + 8u); // The start of the pool
+    // Offsets are relative to the start of the eventRecord
+    auto eventRecordPool = reinterpret_cast<unsigned char*>(eventRecord.data());
     size_t uint32_t_size = sizeof(uint32_t);
 
     // The length of the SWTrace string (name)
@@ -737,10 +744,8 @@ BOOST_AUTO_TEST_CASE(CreateEventRecordNoUnitsTest)
 
 BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest1)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter for testing
     uint16_t counterUid = 7256;
@@ -764,7 +769,7 @@ BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest1)
                                                          counterUnits,
                                                          deviceUid,
                                                          counterSetUid);
-    BOOST_ASSERT(counter);
+    ARMNN_ASSERT(counter);
 
     // Create an event record
     SendCounterPacket::EventRecord eventRecord;
@@ -778,10 +783,8 @@ BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest1)
 
 BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest2)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter for testing
     uint16_t counterUid = 7256;
@@ -805,7 +808,7 @@ BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest2)
                                                          counterUnits,
                                                          deviceUid,
                                                          counterSetUid);
-    BOOST_ASSERT(counter);
+    ARMNN_ASSERT(counter);
 
     // Create an event record
     SendCounterPacket::EventRecord eventRecord;
@@ -819,10 +822,8 @@ BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest2)
 
 BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest3)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a counter for testing
     uint16_t counterUid = 7256;
@@ -846,7 +847,7 @@ BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest3)
                                                          counterUnits,
                                                          deviceUid,
                                                          counterSetUid);
-    BOOST_ASSERT(counter);
+    ARMNN_ASSERT(counter);
 
     // Create an event record
     SendCounterPacket::EventRecord eventRecord;
@@ -860,17 +861,13 @@ BOOST_AUTO_TEST_CASE(CreateInvalidEventRecordTest3)
 
 BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a category for testing
     const std::string categoryName = "some_category";
-    uint16_t deviceUid = 1302;
-    uint16_t counterSetUid = 20734;
-    const CategoryPtr category = std::make_unique<Category>(categoryName, deviceUid, counterSetUid);
-    BOOST_ASSERT(category);
+    const CategoryPtr category = std::make_unique<Category>(categoryName);
+    ARMNN_ASSERT(category);
     category->m_Counters = { 11u, 23u, 5670u };
 
     // Create a collection of counters
@@ -914,9 +911,9 @@ BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
     Counter* counter1 = counters.find(11)->second.get();
     Counter* counter2 = counters.find(23)->second.get();
     Counter* counter3 = counters.find(5670)->second.get();
-    BOOST_ASSERT(counter1);
-    BOOST_ASSERT(counter2);
-    BOOST_ASSERT(counter3);
+    ARMNN_ASSERT(counter1);
+    ARMNN_ASSERT(counter2);
+    ARMNN_ASSERT(counter3);
     uint16_t categoryEventCount = boost::numeric_cast<uint16_t>(counters.size());
 
     // Create a category record
@@ -926,36 +923,30 @@ BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
 
     BOOST_CHECK(result);
     BOOST_CHECK(errorMessage.empty());
-    BOOST_CHECK(categoryRecord.size() == 80); // Size in words: header [4] + event pointer table [3] +
+    BOOST_CHECK(categoryRecord.size() == 79); // Size in words: header [3] + event pointer table [3] +
                                               //                category name [5] + event records [68 = 22 + 20 + 26]
 
-    uint16_t categoryRecordWord0[]
+    uint16_t categoryRecordWord1[]
     {
         static_cast<uint16_t>(categoryRecord[0] >> 16),
         static_cast<uint16_t>(categoryRecord[0])
     };
-    uint16_t categoryRecordWord1[]
-    {
-        static_cast<uint16_t>(categoryRecord[1] >> 16),
-        static_cast<uint16_t>(categoryRecord[1])
-    };
-    BOOST_CHECK(categoryRecordWord0[0] == deviceUid); // device
-    BOOST_CHECK(categoryRecordWord0[1] == counterSetUid); // counter_set
     BOOST_CHECK(categoryRecordWord1[0] == categoryEventCount); // event_count
     BOOST_CHECK(categoryRecordWord1[1] == 0); // reserved
 
     size_t uint32_t_size = sizeof(uint32_t);
 
     ARMNN_NO_CONVERSION_WARN_BEGIN
-    uint32_t eventPointerTableOffset = 0; // The event pointer table is the first item in pool
+    uint32_t categoryRecordBlockSize = 3u * uint32_t_size;
+    uint32_t eventPointerTableOffset = categoryRecordBlockSize; // The event pointer table is the first item in pool
     uint32_t categoryNameOffset = eventPointerTableOffset + // Event pointer table offset
                                   categoryEventCount * uint32_t_size; // The size of the event pointer table
     ARMNN_NO_CONVERSION_WARN_END
 
-    BOOST_CHECK(categoryRecord[2] == eventPointerTableOffset); // event_pointer_table_offset
-    BOOST_CHECK(categoryRecord[3] == categoryNameOffset); // name_offset
-
-    auto categoryRecordPool = reinterpret_cast<unsigned char*>(categoryRecord.data() + 4u); // The start of the pool
+    BOOST_CHECK(categoryRecord[1] == eventPointerTableOffset); // event_pointer_table_offset
+    BOOST_CHECK(categoryRecord[2] == categoryNameOffset); // name_offset
+    // Offsets are relative to the start of the category record
+    auto categoryRecordPool = reinterpret_cast<unsigned char*>(categoryRecord.data());
 
     // The event pointer table
     uint32_t eventRecord0Offset = categoryRecordPool[eventPointerTableOffset + 0 * uint32_t_size];
@@ -981,60 +972,71 @@ BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
 
     // Counter1 UID and max counter UID
     uint16_t eventRecord0Word0[2] = { 0u, 0u };
-    std::memcpy(eventRecord0Word0, categoryRecordPool + eventRecord0Offset, sizeof(eventRecord0Word0));
+    std::memcpy(eventRecord0Word0, categoryRecordPool + categoryRecordBlockSize + eventRecord0Offset,
+                sizeof(eventRecord0Word0));
     BOOST_CHECK(eventRecord0Word0[0] == counter1->m_Uid);
     BOOST_CHECK(eventRecord0Word0[1] == counter1->m_MaxCounterUid);
 
     // Counter1 name
     uint32_t counter1NameOffset = 0;
-    std::memcpy(&counter1NameOffset, categoryRecordPool + eventRecord0Offset + 5u * uint32_t_size, uint32_t_size);
+    std::memcpy(&counter1NameOffset, categoryRecordPool  + eventRecord0Offset + 5u * uint32_t_size, uint32_t_size);
     BOOST_CHECK(counter1NameOffset == 0);
     // The length of the SWTrace string (name)
-    BOOST_CHECK(categoryRecordPool[eventRecord0Offset + // Offset to the event record
-                                   8u * uint32_t_size + // Offset to the event record pool
-                                   counter1NameOffset   // Offset to the name of the counter
+    BOOST_CHECK(categoryRecordPool[eventRecord0Offset +       // Offset to the event record
+                                   categoryRecordBlockSize  + // Offset to the end of the category record block
+                                   8u * uint32_t_size +       // Offset to the event record pool
+                                   counter1NameOffset         // Offset to the name of the counter
                                   ] == counter1->m_Name.size() + 1); // The length of the name including the
                                                                      // null-terminator
     // The counter1 name
-    BOOST_CHECK(std::memcmp(categoryRecordPool + // The beginning of the category pool
-                            eventRecord0Offset + // Offset to the event record
-                            8u * uint32_t_size + // Offset to the event record pool
-                            counter1NameOffset + // Offset to the name of the counter
-                            uint32_t_size,       // The length of the name
+    BOOST_CHECK(std::memcmp(categoryRecordPool +      // The beginning of the category pool
+                            categoryRecordBlockSize + // Offset to the end of the category record block
+                            eventRecord0Offset +      // Offset to the event record
+                            8u * uint32_t_size +      // Offset to the event record pool
+                            counter1NameOffset +      // Offset to the name of the counter
+                            uint32_t_size,            // The length of the name
                             counter1->m_Name.data(),
                             counter1->m_Name.size()) == 0); // name
     // The null-terminator at the end of the counter1 name
-    BOOST_CHECK(categoryRecordPool[eventRecord0Offset +    // Offset to the event record
-                                   8u * uint32_t_size +    // Offset to the event record pool
-                                   counter1NameOffset +    // Offset to the name of the counter
-                                   uint32_t_size +         // The length of the name
-                                   counter1->m_Name.size() // The name of the counter
+    BOOST_CHECK(categoryRecordPool[eventRecord0Offset +      // Offset to the event record
+                                   categoryRecordBlockSize + // Offset to the end of the category record block
+                                   8u * uint32_t_size +      // Offset to the event record pool
+                                   counter1NameOffset +      // Offset to the name of the counter
+                                   uint32_t_size +           // The length of the name
+                                   counter1->m_Name.size()   // The name of the counter
                                    ] == '\0');
 
     // Counter2 name
     uint32_t counter2NameOffset = 0;
-    std::memcpy(&counter2NameOffset, categoryRecordPool + eventRecord1Offset + 5u * uint32_t_size, uint32_t_size);
-    BOOST_CHECK(counter2NameOffset == 0);
+    std::memcpy(&counter2NameOffset, categoryRecordPool +
+                                     categoryRecordBlockSize +
+                                     eventRecord1Offset +
+                                     5u * uint32_t_size,
+                                     uint32_t_size);
+    BOOST_CHECK(counter2NameOffset == 8u * uint32_t_size );
     // The length of the SWTrace string (name)
+
     BOOST_CHECK(categoryRecordPool[eventRecord1Offset + // Offset to the event record
-                                   8u * uint32_t_size + // Offset to the event record pool
+                                   categoryRecordBlockSize +
                                    counter2NameOffset   // Offset to the name of the counter
                                   ] == counter2->m_Name.size() + 1); // The length of the name including the
                                                                      // null-terminator
     // The counter2 name
-    BOOST_CHECK(std::memcmp(categoryRecordPool + // The beginning of the category pool
-                            eventRecord1Offset + // Offset to the event record
-                            8u * uint32_t_size + // Offset to the event record pool
-                            counter2NameOffset + // Offset to the name of the counter
-                            uint32_t_size,       // The length of the name
+    BOOST_CHECK(std::memcmp(categoryRecordPool +      // The beginning of the category pool
+                            categoryRecordBlockSize + // Offset to the end of the category record block
+                            eventRecord1Offset +      // Offset to the event record
+                            counter2NameOffset +      // Offset to the name of the counter
+                            uint32_t_size,            // The length of the name
                             counter2->m_Name.data(),
                             counter2->m_Name.size()) == 0); // name
+
+
     // The null-terminator at the end of the counter2 name
-    BOOST_CHECK(categoryRecordPool[eventRecord1Offset +    // Offset to the event record
-                                   8u * uint32_t_size +    // Offset to the event record pool
-                                   counter2NameOffset +    // Offset to the name of the counter
-                                   uint32_t_size +         // The length of the name
-                                   counter2->m_Name.size() // The name of the counter
+    BOOST_CHECK(categoryRecordPool[eventRecord1Offset +      // Offset to the event record
+                                   categoryRecordBlockSize + // Offset to the end of the category record block
+                                   counter2NameOffset +      // Offset to the name of the counter
+                                   uint32_t_size +           // The length of the name
+                                   counter2->m_Name.size()   // The name of the counter
                                    ] == '\0');
 
     // Counter3 name
@@ -1043,12 +1045,14 @@ BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
     BOOST_CHECK(counter3NameOffset == 0);
     // The length of the SWTrace string (name)
     BOOST_CHECK(categoryRecordPool[eventRecord2Offset + // Offset to the event record
+                                   categoryRecordBlockSize +
                                    8u * uint32_t_size + // Offset to the event record pool
                                    counter3NameOffset   // Offset to the name of the counter
                                   ] == counter3->m_Name.size() + 1); // The length of the name including the
                                                                      // null-terminator
     // The counter3 name
     BOOST_CHECK(std::memcmp(categoryRecordPool + // The beginning of the category pool
+                            categoryRecordBlockSize +
                             eventRecord2Offset + // Offset to the event record
                             8u * uint32_t_size + // Offset to the event record pool
                             counter3NameOffset + // Offset to the name of the counter
@@ -1057,6 +1061,7 @@ BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
                             counter3->m_Name.size()) == 0); // name
     // The null-terminator at the end of the counter3 name
     BOOST_CHECK(categoryRecordPool[eventRecord2Offset +    // Offset to the event record
+                                   categoryRecordBlockSize +
                                    8u * uint32_t_size +    // Offset to the event record pool
                                    counter3NameOffset +    // Offset to the name of the counter
                                    uint32_t_size +         // The length of the name
@@ -1066,16 +1071,12 @@ BOOST_AUTO_TEST_CASE(CreateCategoryRecordTest)
 
 BOOST_AUTO_TEST_CASE(CreateInvalidCategoryRecordTest1)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a category for testing
     const std::string categoryName = "some invalid category";
-    uint16_t deviceUid = 1302;
-    uint16_t counterSetUid = 20734;
-    const CategoryPtr category = std::make_unique<Category>(categoryName, deviceUid, counterSetUid);
+    const CategoryPtr category = std::make_unique<Category>(categoryName);
     BOOST_CHECK(category);
 
     // Create a category record
@@ -1091,16 +1092,12 @@ BOOST_AUTO_TEST_CASE(CreateInvalidCategoryRecordTest1)
 
 BOOST_AUTO_TEST_CASE(CreateInvalidCategoryRecordTest2)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     MockBufferManager mockBuffer(0);
-    SendCounterPacketTest sendCounterPacketTest(profilingStateMachine, mockBuffer);
+    SendCounterPacketTest sendCounterPacketTest(mockBuffer);
 
     // Create a category for testing
     const std::string categoryName = "some_category";
-    uint16_t deviceUid = 1302;
-    uint16_t counterSetUid = 20734;
-    const CategoryPtr category = std::make_unique<Category>(categoryName, deviceUid, counterSetUid);
+    const CategoryPtr category = std::make_unique<Category>(categoryName);
     BOOST_CHECK(category);
     category->m_Counters = { 11u, 23u, 5670u };
 
@@ -1134,8 +1131,6 @@ BOOST_AUTO_TEST_CASE(CreateInvalidCategoryRecordTest2)
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest1)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // The counter directory used for testing
     CounterDirectory counterDirectory;
 
@@ -1155,15 +1150,13 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest1)
 
     // Buffer with not enough space
     MockBufferManager mockBuffer(10);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory),
                       armnn::profiling::BufferExhaustion);
 }
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // The counter directory used for testing
     CounterDirectory counterDirectory;
 
@@ -1191,20 +1184,19 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
     // Register a category associated to "device1" and "counterset1"
     const std::string category1Name = "category1";
     const Category* category1 = nullptr;
-    BOOST_CHECK_NO_THROW(category1 = counterDirectory.RegisterCategory(category1Name,
-                                                                       device1->m_Uid,
-                                                                       counterSet1->m_Uid));
+    BOOST_CHECK_NO_THROW(category1 = counterDirectory.RegisterCategory(category1Name));
     BOOST_CHECK(counterDirectory.GetCategoryCount() == 1);
     BOOST_CHECK(category1);
 
     // Register a category not associated to "device2" but no counter set
     const std::string category2Name = "category2";
     const Category* category2 = nullptr;
-    BOOST_CHECK_NO_THROW(category2 = counterDirectory.RegisterCategory(category2Name,
-                                                                       device2->m_Uid));
+    BOOST_CHECK_NO_THROW(category2 = counterDirectory.RegisterCategory(category2Name));
     BOOST_CHECK(counterDirectory.GetCategoryCount() == 2);
     BOOST_CHECK(category2);
 
+    uint16_t numberOfCores = 4;
+
     // Register a counter associated to "category1"
     const Counter* counter1 = nullptr;
     BOOST_CHECK_NO_THROW(counter1 = counterDirectory.RegisterCounter(armnn::profiling::BACKEND_ID,
@@ -1215,8 +1207,9 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
                                                                      123.45f,
                                                                      "counter1",
                                                                      "counter1description",
-                                                                     std::string("counter1units")));
-    BOOST_CHECK(counterDirectory.GetCounterCount() == 3);
+                                                                     std::string("counter1units"),
+                                                                     numberOfCores));
+    BOOST_CHECK(counterDirectory.GetCounterCount() == 4);
     BOOST_CHECK(counter1);
 
     // Register a counter associated to "category1"
@@ -1233,7 +1226,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
                                                                      armnn::EmptyOptional(),
                                                                      device2->m_Uid,
                                                                      0));
-    BOOST_CHECK(counterDirectory.GetCounterCount() == 4);
+    BOOST_CHECK(counterDirectory.GetCounterCount() == 5);
     BOOST_CHECK(counter2);
 
     // Register a counter associated to "category2"
@@ -1247,7 +1240,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
                                                                      "counter3",
                                                                      "counter3description",
                                                                      armnn::EmptyOptional(),
-                                                                     5,
+                                                                     numberOfCores,
                                                                      device2->m_Uid,
                                                                      counterSet1->m_Uid));
     BOOST_CHECK(counterDirectory.GetCounterCount() == 9);
@@ -1255,59 +1248,55 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
 
     // Buffer with enough space
     MockBufferManager mockBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_NO_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory));
 
     // Get the readable buffer
     auto readBuffer = mockBuffer.GetReadableBuffer();
 
     // Check the packet header
-    uint32_t packetHeaderWord0 = ReadUint32(readBuffer, 0);
-    uint32_t packetHeaderWord1 = ReadUint32(readBuffer, 4);
+    const uint32_t packetHeaderWord0 = ReadUint32(readBuffer, 0);
+    const uint32_t packetHeaderWord1 = ReadUint32(readBuffer, 4);
     BOOST_TEST(((packetHeaderWord0 >> 26) & 0x3F) == 0);  // packet_family
     BOOST_TEST(((packetHeaderWord0 >> 16) & 0x3FF) == 2); // packet_id
-    BOOST_TEST(packetHeaderWord1 == 936);                 // data_length
+    BOOST_TEST(packetHeaderWord1 == 432);                 // data_length
 
     // Check the body header
-    uint32_t bodyHeaderWord0 = ReadUint32(readBuffer,  8);
-    uint32_t bodyHeaderWord1 = ReadUint32(readBuffer, 12);
-    uint32_t bodyHeaderWord2 = ReadUint32(readBuffer, 16);
-    uint32_t bodyHeaderWord3 = ReadUint32(readBuffer, 20);
-    uint32_t bodyHeaderWord4 = ReadUint32(readBuffer, 24);
-    uint32_t bodyHeaderWord5 = ReadUint32(readBuffer, 28);
-    uint16_t deviceRecordCount     = static_cast<uint16_t>(bodyHeaderWord0 >> 16);
-    uint16_t counterSetRecordCount = static_cast<uint16_t>(bodyHeaderWord2 >> 16);
-    uint16_t categoryRecordCount   = static_cast<uint16_t>(bodyHeaderWord4 >> 16);
-    BOOST_TEST(deviceRecordCount == 2);     // device_records_count
-    BOOST_TEST(bodyHeaderWord1 == 0);       // device_records_pointer_table_offset
-    BOOST_TEST(counterSetRecordCount == 1); // counter_set_count
-    BOOST_TEST(bodyHeaderWord3 == 8);       // counter_set_pointer_table_offset
-    BOOST_TEST(categoryRecordCount == 2);   // categories_count
-    BOOST_TEST(bodyHeaderWord5 == 12);      // categories_pointer_table_offset
+    const uint32_t bodyHeaderWord0 = ReadUint32(readBuffer,  8);
+    const uint32_t bodyHeaderWord1 = ReadUint32(readBuffer, 12);
+    const uint32_t bodyHeaderWord2 = ReadUint32(readBuffer, 16);
+    const uint32_t bodyHeaderWord3 = ReadUint32(readBuffer, 20);
+    const uint32_t bodyHeaderWord4 = ReadUint32(readBuffer, 24);
+    const uint32_t bodyHeaderWord5 = ReadUint32(readBuffer, 28);
+    const uint16_t deviceRecordCount     = static_cast<uint16_t>(bodyHeaderWord0 >> 16);
+    const uint16_t counterSetRecordCount = static_cast<uint16_t>(bodyHeaderWord2 >> 16);
+    const uint16_t categoryRecordCount   = static_cast<uint16_t>(bodyHeaderWord4 >> 16);
+    BOOST_TEST(deviceRecordCount == 2);                      // device_records_count
+    BOOST_TEST(bodyHeaderWord1 == bodyHeaderSize * 4);           // device_records_pointer_table_offset
+    BOOST_TEST(counterSetRecordCount == 1);                  // counter_set_count
+    BOOST_TEST(bodyHeaderWord3 == 8 + bodyHeaderSize * 4);       // counter_set_pointer_table_offset
+    BOOST_TEST(categoryRecordCount == 2);                    // categories_count
+    BOOST_TEST(bodyHeaderWord5 == 12 + bodyHeaderSize * 4);      // categories_pointer_table_offset
 
     // Check the device records pointer table
-    uint32_t deviceRecordOffset0 = ReadUint32(readBuffer, 32);
-    uint32_t deviceRecordOffset1 = ReadUint32(readBuffer, 36);
-    BOOST_TEST(deviceRecordOffset0 ==  0); // Device record offset for "device1"
-    BOOST_TEST(deviceRecordOffset1 == 20); // Device record offset for "device2"
+    const uint32_t deviceRecordOffset0 = ReadUint32(readBuffer, 32);
+    const uint32_t deviceRecordOffset1 = ReadUint32(readBuffer, 36);
+    BOOST_TEST(deviceRecordOffset0 == 20); // Device record offset for "device1"
+    BOOST_TEST(deviceRecordOffset1 == 40); // Device record offset for "device2"
 
     // Check the counter set pointer table
-    uint32_t counterSetRecordOffset0 = ReadUint32(readBuffer, 40);
-    BOOST_TEST(counterSetRecordOffset0 == 40); // Counter set record offset for "counterset1"
+    const uint32_t counterSetRecordOffset0 = ReadUint32(readBuffer, 40);
+    BOOST_TEST(counterSetRecordOffset0 == 52); // Counter set record offset for "counterset1"
 
     // Check the category pointer table
-    uint32_t categoryRecordOffset0 = ReadUint32(readBuffer, 44);
-    uint32_t categoryRecordOffset1 = ReadUint32(readBuffer, 48);
-    BOOST_TEST(categoryRecordOffset0 ==  64); // Category record offset for "category1"
-    BOOST_TEST(categoryRecordOffset1 == 476); // Category record offset for "category2"
+    const uint32_t categoryRecordOffset0 = ReadUint32(readBuffer, 44);
+    const uint32_t categoryRecordOffset1 = ReadUint32(readBuffer, 48);
+    BOOST_TEST(categoryRecordOffset0 ==  72); // Category record offset for "category1"
+    BOOST_TEST(categoryRecordOffset1 == 176); // Category record offset for "category2"
 
     // Get the device record pool offset
-    uint32_t uint32_t_size = sizeof(uint32_t);
-    uint32_t packetBodyPoolOffset = 2u * uint32_t_size +                    // packet_header
-                                    6u * uint32_t_size +                    // body_header
-                                    deviceRecordCount * uint32_t_size +     // Size of device_records_pointer_table
-                                    counterSetRecordCount * uint32_t_size + // Size of counter_set_pointer_table
-                                    categoryRecordCount * uint32_t_size;    // Size of categories_pointer_table
+    const uint32_t uint32_t_size = sizeof(uint32_t);
+    const uint32_t packetHeaderSize = 2u * uint32_t_size;
 
     // Device record structure/collection used for testing
     struct DeviceRecord
@@ -1319,30 +1308,40 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
         std::string name;
     };
     std::vector<DeviceRecord> deviceRecords;
-    uint32_t deviceRecordsPointerTableOffset = 2u * uint32_t_size + // packet_header
-                                               6u * uint32_t_size + // body_header
-                                               bodyHeaderWord1;     // device_records_pointer_table_offset
+    const uint32_t deviceRecordsPointerTableOffset = packetHeaderSize +
+                                                     bodyHeaderWord1;     // device_records_pointer_table_offset
 
     const unsigned char* readData = readBuffer->GetReadableData();
 
-    for (uint32_t i = 0; i < deviceRecordCount; i++)
+    uint32_t offset = 0;
+    std::vector<uint32_t> data(800);
+
+    for (uint32_t i = 0; i < 800; i+=uint32_t_size)
     {
-        // Get the device record offset
-        uint32_t deviceRecordOffset = ReadUint32(readBuffer, deviceRecordsPointerTableOffset + i * uint32_t_size);
+        data[i] = ReadUint32(readBuffer, offset);
+        offset += uint32_t_size;
+    }
+
+    std::vector<uint32_t> deviceRecordOffsets(deviceRecordCount);
+     offset = deviceRecordsPointerTableOffset;
+    for (uint32_t i = 0; i < deviceRecordCount; ++i)
+    {
+        // deviceRecordOffset is relative to the start of the deviceRecordsPointerTable
+        deviceRecordOffsets[i] = ReadUint32(readBuffer, offset) + deviceRecordsPointerTableOffset;
+        offset += uint32_t_size;
+    }
 
+    for (uint32_t i = 0; i < deviceRecordCount; i++)
+    {
         // Collect the data for the device record
-        uint32_t deviceRecordWord0 = ReadUint32(readBuffer,
-                                                packetBodyPoolOffset + deviceRecordOffset + 0 * uint32_t_size);
-        uint32_t deviceRecordWord1 = ReadUint32(readBuffer,
-                                                packetBodyPoolOffset + deviceRecordOffset + 1 * uint32_t_size);
+        const uint32_t deviceRecordWord0 = ReadUint32(readBuffer, deviceRecordOffsets[i] + 0 * uint32_t_size);
+        const uint32_t deviceRecordWord1 = ReadUint32(readBuffer, deviceRecordOffsets[i] + 1 * uint32_t_size);
         DeviceRecord deviceRecord;
         deviceRecord.uid = static_cast<uint16_t>(deviceRecordWord0 >> 16); // uid
         deviceRecord.cores = static_cast<uint16_t>(deviceRecordWord0);     // cores
         deviceRecord.name_offset = deviceRecordWord1;                      // name_offset
 
-        uint32_t deviceRecordPoolOffset = packetBodyPoolOffset +    // Packet body offset
-                                          deviceRecordOffset +      // Device record offset
-                                          2 * uint32_t_size +       // Device record header
+        uint32_t deviceRecordPoolOffset = deviceRecordOffsets[i] +                  // Packet body offset
                                           deviceRecord.name_offset; // Device name offset
         uint32_t deviceRecordNameLength = ReadUint32(readBuffer, deviceRecordPoolOffset);
         deviceRecord.name_length = deviceRecordNameLength; // name_length
@@ -1368,6 +1367,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
         BOOST_CHECK(device->m_Name  == deviceRecord.name);
     }
 
+
     // Counter set record structure/collection used for testing
     struct CounterSetRecord
     {
@@ -1378,28 +1378,30 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
         std::string name;
     };
     std::vector<CounterSetRecord> counterSetRecords;
-    uint32_t counterSetRecordsPointerTableOffset = 2u * uint32_t_size + // packet_header
-                                                   6u * uint32_t_size + // body_header
-                                                   bodyHeaderWord3;     // counter_set_pointer_table_offset
-    for (uint32_t i = 0; i < counterSetRecordCount; i++)
+    const uint32_t counterSetRecordsPointerTableOffset = 2u * uint32_t_size + // packet_header
+                                                         bodyHeaderWord3;     // counter_set_pointer_table_offset
+
+    offset = counterSetRecordsPointerTableOffset;
+    std::vector<uint32_t> counterSetRecordOffsets(counterSetRecordCount);
+
+    for (uint32_t i = 0; i < counterSetRecordCount; ++i)
     {
-        // Get the counter set record offset
-        uint32_t counterSetRecordOffset = ReadUint32(readBuffer,
-                                                     counterSetRecordsPointerTableOffset + i * uint32_t_size);
+        // counterSetRecordOffset is relative to the start of the dcounterSetRecordsPointerTable
+        counterSetRecordOffsets[i] = ReadUint32(readBuffer, offset) + counterSetRecordsPointerTableOffset;
+        offset += uint32_t_size;
+    }
 
+    for (uint32_t i = 0; i < counterSetRecordCount; i++)
+    {
         // Collect the data for the counter set record
-        uint32_t counterSetRecordWord0 = ReadUint32(readBuffer,
-                                                    packetBodyPoolOffset + counterSetRecordOffset + 0 * uint32_t_size);
-        uint32_t counterSetRecordWord1 = ReadUint32(readBuffer,
-                                                    packetBodyPoolOffset + counterSetRecordOffset + 1 * uint32_t_size);
+        const uint32_t counterSetRecordWord0 = ReadUint32(readBuffer, counterSetRecordOffsets[i] + 0 * uint32_t_size);
+        const uint32_t counterSetRecordWord1 = ReadUint32(readBuffer, counterSetRecordOffsets[i] + 1 * uint32_t_size);
         CounterSetRecord counterSetRecord;
         counterSetRecord.uid = static_cast<uint16_t>(counterSetRecordWord0 >> 16); // uid
         counterSetRecord.count = static_cast<uint16_t>(counterSetRecordWord0);     // count
         counterSetRecord.name_offset = counterSetRecordWord1;                      // name_offset
 
-        uint32_t counterSetRecordPoolOffset = packetBodyPoolOffset +        // Packet body offset
-                                              counterSetRecordOffset +      // Counter set record offset
-                                              2 * uint32_t_size +           // Counter set record header
+        uint32_t counterSetRecordPoolOffset = counterSetRecordOffsets[i]  +                 // Packet body offset
                                               counterSetRecord.name_offset; // Counter set name offset
         uint32_t counterSetRecordNameLength = ReadUint32(readBuffer, counterSetRecordPoolOffset);
         counterSetRecord.name_length = counterSetRecordNameLength; // name_length
@@ -1448,8 +1450,6 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
     // Category record structure/collection used for testing
     struct CategoryRecord
     {
-        uint16_t                 device;
-        uint16_t                 counter_set;
         uint16_t                 event_count;
         uint32_t                 event_pointer_table_offset;
         uint32_t                 name_offset;
@@ -1459,40 +1459,35 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
         std::vector<EventRecord> event_records;
     };
     std::vector<CategoryRecord> categoryRecords;
-    uint32_t categoryRecordsPointerTableOffset = 2u * uint32_t_size + // packet_header
-                                                 6u * uint32_t_size + // body_header
-                                                 bodyHeaderWord5;     // categories_pointer_table_offset
-    for (uint32_t i = 0; i < categoryRecordCount; i++)
+    const uint32_t categoryRecordsPointerTableOffset = 2u * uint32_t_size + // packet_header
+                                                       bodyHeaderWord5;    // categories_pointer_table_offset
+
+    offset = categoryRecordsPointerTableOffset;
+    std::vector<uint32_t> categoryRecordOffsets(categoryRecordCount);
+    for (uint32_t i = 0; i < categoryRecordCount; ++i)
     {
-        // Get the category record offset
-        uint32_t categoryRecordOffset = ReadUint32(readBuffer, categoryRecordsPointerTableOffset + i * uint32_t_size);
+        // categoryRecordOffset is relative to the start of the categoryRecordsPointerTable
+        categoryRecordOffsets[i] = ReadUint32(readBuffer, offset) + categoryRecordsPointerTableOffset;
+        offset += uint32_t_size;
+    }
 
+    for (uint32_t i = 0; i < categoryRecordCount; i++)
+    {
         // Collect the data for the category record
-        uint32_t categoryRecordWord0 = ReadUint32(readBuffer,
-                                                  packetBodyPoolOffset + categoryRecordOffset + 0 * uint32_t_size);
-        uint32_t categoryRecordWord1 = ReadUint32(readBuffer,
-                                                  packetBodyPoolOffset + categoryRecordOffset + 1 * uint32_t_size);
-        uint32_t categoryRecordWord2 = ReadUint32(readBuffer,
-                                                  packetBodyPoolOffset + categoryRecordOffset + 2 * uint32_t_size);
-        uint32_t categoryRecordWord3 = ReadUint32(readBuffer,
-                                                  packetBodyPoolOffset + categoryRecordOffset + 3 * uint32_t_size);
+        const uint32_t categoryRecordWord1 = ReadUint32(readBuffer, categoryRecordOffsets[i] + 0 * uint32_t_size);
+        const uint32_t categoryRecordWord2 = ReadUint32(readBuffer, categoryRecordOffsets[i] + 1 * uint32_t_size);
+        const uint32_t categoryRecordWord3 = ReadUint32(readBuffer, categoryRecordOffsets[i] + 2 * uint32_t_size);
         CategoryRecord categoryRecord;
-        categoryRecord.device = static_cast<uint16_t>(categoryRecordWord0 >> 16);      // device
-        categoryRecord.counter_set = static_cast<uint16_t>(categoryRecordWord0);       // counter_set
         categoryRecord.event_count = static_cast<uint16_t>(categoryRecordWord1 >> 16); // event_count
         categoryRecord.event_pointer_table_offset = categoryRecordWord2;               // event_pointer_table_offset
         categoryRecord.name_offset = categoryRecordWord3;                              // name_offset
 
-        uint32_t categoryRecordPoolOffset = packetBodyPoolOffset +      // Packet body offset
-                                            categoryRecordOffset +      // Category record offset
-                                            4 * uint32_t_size;          // Category record header
-
         uint32_t categoryRecordNameLength = ReadUint32(readBuffer,
-                                                       categoryRecordPoolOffset + categoryRecord.name_offset);
+                                                       categoryRecordOffsets[i] + categoryRecord.name_offset);
         categoryRecord.name_length = categoryRecordNameLength; // name_length
         unsigned char categoryRecordNameNullTerminator =
                 ReadUint8(readBuffer,
-                          categoryRecordPoolOffset +
+                          categoryRecordOffsets[i] +
                           categoryRecord.name_offset +
                           uint32_t_size +
                           categoryRecordNameLength - 1); // name null-terminator
@@ -1500,36 +1495,35 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
         std::vector<unsigned char> categoryRecordNameBuffer(categoryRecord.name_length - 1);
         std::memcpy(categoryRecordNameBuffer.data(),
                     readData +
-                    categoryRecordPoolOffset +
+                    categoryRecordOffsets[i] +
                     categoryRecord.name_offset +
                     uint32_t_size,
                     categoryRecordNameBuffer.size());
         categoryRecord.name.assign(categoryRecordNameBuffer.begin(), categoryRecordNameBuffer.end()); // name
 
         categoryRecord.event_pointer_table.resize(categoryRecord.event_count);
-        for (uint32_t eventIndex = 0; eventIndex < categoryRecord.event_count; eventIndex++)
+        offset = categoryRecordOffsets[i] + categoryRecord.event_pointer_table_offset;
+        for (uint32_t eventOffsetIndex = 0; eventOffsetIndex < categoryRecord.event_count; ++eventOffsetIndex)
         {
-            uint32_t eventRecordOffset = ReadUint32(readBuffer,
-                                                    categoryRecordPoolOffset +
-                                                    categoryRecord.event_pointer_table_offset +
-                                                    eventIndex * uint32_t_size);
-            categoryRecord.event_pointer_table[eventIndex] = eventRecordOffset;
+            // eventRecordOffset is relative to the start of the event pointer table
+            categoryRecord.event_pointer_table[eventOffsetIndex] = ReadUint32(readBuffer, offset) +
+                                                                   categoryRecordOffsets[i] +
+                                                                   categoryRecord.event_pointer_table_offset;
+            offset += uint32_t_size;
+        }
 
+        for (uint32_t eventIndex = 0; eventIndex < categoryRecord.event_count; eventIndex++)
+        {
+            const uint32_t eventOffset = categoryRecord.event_pointer_table[eventIndex];
             // Collect the data for the event record
-            uint32_t eventRecordWord0  = ReadUint32(readBuffer,
-                                                    categoryRecordPoolOffset + eventRecordOffset + 0 * uint32_t_size);
-            uint32_t eventRecordWord1  = ReadUint32(readBuffer,
-                                                    categoryRecordPoolOffset + eventRecordOffset + 1 * uint32_t_size);
-            uint32_t eventRecordWord2  = ReadUint32(readBuffer,
-                                                    categoryRecordPoolOffset + eventRecordOffset + 2 * uint32_t_size);
-            uint64_t eventRecordWord34 = ReadUint64(readBuffer,
-                                                    categoryRecordPoolOffset + eventRecordOffset + 3 * uint32_t_size);
-            uint32_t eventRecordWord5 =  ReadUint32(readBuffer,
-                                                    categoryRecordPoolOffset + eventRecordOffset + 5 * uint32_t_size);
-            uint32_t eventRecordWord6 = ReadUint32(readBuffer,
-                                                   categoryRecordPoolOffset + eventRecordOffset + 6 * uint32_t_size);
-            uint32_t eventRecordWord7 = ReadUint32(readBuffer,
-                                                   categoryRecordPoolOffset + eventRecordOffset + 7 * uint32_t_size);
+            const uint32_t eventRecordWord0  = ReadUint32(readBuffer, eventOffset + 0 * uint32_t_size);
+            const uint32_t eventRecordWord1  = ReadUint32(readBuffer, eventOffset + 1 * uint32_t_size);
+            const uint32_t eventRecordWord2  = ReadUint32(readBuffer, eventOffset + 2 * uint32_t_size);
+            const uint64_t eventRecordWord34 = ReadUint64(readBuffer, eventOffset + 3 * uint32_t_size);
+            const uint32_t eventRecordWord5  = ReadUint32(readBuffer, eventOffset + 5 * uint32_t_size);
+            const uint32_t eventRecordWord6  = ReadUint32(readBuffer, eventOffset + 6 * uint32_t_size);
+            const uint32_t eventRecordWord7  = ReadUint32(readBuffer, eventOffset + 7 * uint32_t_size);
+
             EventRecord eventRecord;
             eventRecord.counter_uid = static_cast<uint16_t>(eventRecordWord0);                     // counter_uid
             eventRecord.max_counter_uid = static_cast<uint16_t>(eventRecordWord0 >> 16);           // max_counter_uid
@@ -1542,16 +1536,11 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
             eventRecord.description_offset = static_cast<uint32_t>(eventRecordWord6);              // description_offset
             eventRecord.units_offset = static_cast<uint32_t>(eventRecordWord7);                    // units_offset
 
-            uint32_t eventRecordPoolOffset = categoryRecordPoolOffset + // Category record pool offset
-                                             eventRecordOffset +        // Event record offset
-                                             8 * uint32_t_size;         // Event record header
-
-            uint32_t eventRecordNameLength = ReadUint32(readBuffer,
-                                                        eventRecordPoolOffset + eventRecord.name_offset);
+            uint32_t eventRecordNameLength = ReadUint32(readBuffer, eventOffset + eventRecord.name_offset);
             eventRecord.name_length = eventRecordNameLength; // name_length
             unsigned char eventRecordNameNullTerminator =
                     ReadUint8(readBuffer,
-                              eventRecordPoolOffset +
+                              eventOffset +
                               eventRecord.name_offset +
                               uint32_t_size +
                               eventRecordNameLength - 1); // name null-terminator
@@ -1559,18 +1548,18 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
             std::vector<unsigned char> eventRecordNameBuffer(eventRecord.name_length - 1);
             std::memcpy(eventRecordNameBuffer.data(),
                         readData +
-                        eventRecordPoolOffset +
+                        eventOffset +
                         eventRecord.name_offset +
                         uint32_t_size,
                         eventRecordNameBuffer.size());
             eventRecord.name.assign(eventRecordNameBuffer.begin(), eventRecordNameBuffer.end()); // name
 
             uint32_t eventRecordDescriptionLength = ReadUint32(readBuffer,
-                                                               eventRecordPoolOffset + eventRecord.description_offset);
+                                                               eventOffset + eventRecord.description_offset);
             eventRecord.description_length = eventRecordDescriptionLength; // description_length
             unsigned char eventRecordDescriptionNullTerminator =
                     ReadUint8(readBuffer,
-                              eventRecordPoolOffset +
+                              eventOffset +
                               eventRecord.description_offset +
                               uint32_t_size +
                               eventRecordDescriptionLength - 1); // description null-terminator
@@ -1578,7 +1567,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
             std::vector<unsigned char> eventRecordDescriptionBuffer(eventRecord.description_length - 1);
             std::memcpy(eventRecordDescriptionBuffer.data(),
                         readData +
-                        eventRecordPoolOffset +
+                        eventOffset +
                         eventRecord.description_offset +
                         uint32_t_size,
                         eventRecordDescriptionBuffer.size());
@@ -1588,11 +1577,11 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
             if (eventRecord.units_offset > 0)
             {
                 uint32_t eventRecordUnitsLength = ReadUint32(readBuffer,
-                                                             eventRecordPoolOffset + eventRecord.units_offset);
+                                                             eventOffset + eventRecord.units_offset);
                 eventRecord.units_length = eventRecordUnitsLength; // units_length
                 unsigned char eventRecordUnitsNullTerminator =
                         ReadUint8(readBuffer,
-                                  eventRecordPoolOffset +
+                                  eventOffset +
                                   eventRecord.units_offset +
                                   uint32_t_size +
                                   eventRecordUnitsLength - 1); // units null-terminator
@@ -1600,7 +1589,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
                 std::vector<unsigned char> eventRecordUnitsBuffer(eventRecord.units_length - 1);
                 std::memcpy(eventRecordUnitsBuffer.data(),
                             readData +
-                            eventRecordPoolOffset +
+                            eventOffset +
                             eventRecord.units_offset +
                             uint32_t_size,
                             eventRecordUnitsBuffer.size());
@@ -1620,9 +1609,8 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
         const Category* category = counterDirectory.GetCategory(categoryRecord.name);
         BOOST_CHECK(category);
         BOOST_CHECK(category->m_Name == categoryRecord.name);
-        BOOST_CHECK(category->m_DeviceUid == categoryRecord.device);
-        BOOST_CHECK(category->m_CounterSetUid == categoryRecord.counter_set);
-        BOOST_CHECK(category->m_Counters.size() == categoryRecord.event_count);
+        BOOST_CHECK(category->m_Counters.size() == categoryRecord.event_count + static_cast<size_t>(numberOfCores) -1);
+        BOOST_CHECK(category->m_Counters.size() == categoryRecord.event_count + static_cast<size_t>(numberOfCores) -1);
 
         // Check that the event records are correct
         for (const EventRecord& eventRecord : categoryRecord.event_records)
@@ -1644,8 +1632,6 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest2)
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest3)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // Using a mock counter directory that allows to register invalid objects
     MockCounterDirectory counterDirectory;
 
@@ -1658,14 +1644,12 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest3)
 
     // Buffer with enough space
     MockBufferManager mockBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory), armnn::RuntimeException);
 }
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest4)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // Using a mock counter directory that allows to register invalid objects
     MockCounterDirectory counterDirectory;
 
@@ -1678,14 +1662,12 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest4)
 
     // Buffer with enough space
     MockBufferManager mockBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory), armnn::RuntimeException);
 }
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest5)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // Using a mock counter directory that allows to register invalid objects
     MockCounterDirectory counterDirectory;
 
@@ -1698,14 +1680,12 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest5)
 
     // Buffer with enough space
     MockBufferManager mockBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory), armnn::RuntimeException);
 }
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest6)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // Using a mock counter directory that allows to register invalid objects
     MockCounterDirectory counterDirectory;
 
@@ -1726,22 +1706,18 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest6)
     // Register an invalid category associated to an invalid device and an invalid counter set
     const std::string categoryName = "c@t€gory";
     const Category* category = nullptr;
-    BOOST_CHECK_NO_THROW(category = counterDirectory.RegisterCategory(categoryName,
-                                                                      device->m_Uid,
-                                                                      counterSet->m_Uid));
+    BOOST_CHECK_NO_THROW(category = counterDirectory.RegisterCategory(categoryName));
     BOOST_CHECK(counterDirectory.GetCategoryCount() == 1);
     BOOST_CHECK(category);
 
     // Buffer with enough space
     MockBufferManager mockBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory), armnn::RuntimeException);
 }
 
 BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest7)
 {
-    ProfilingStateMachine profilingStateMachine;
-
     // Using a mock counter directory that allows to register invalid objects
     MockCounterDirectory counterDirectory;
 
@@ -1762,9 +1738,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest7)
     // Register an valid category associated to a valid device and a valid counter set
     const std::string categoryName = "category";
     const Category* category = nullptr;
-    BOOST_CHECK_NO_THROW(category = counterDirectory.RegisterCategory(categoryName,
-                                                                      device->m_Uid,
-                                                                      counterSet->m_Uid));
+    BOOST_CHECK_NO_THROW(category = counterDirectory.RegisterCategory(categoryName));
     BOOST_CHECK(counterDirectory.GetCategoryCount() == 1);
     BOOST_CHECK(category);
 
@@ -1787,7 +1761,7 @@ BOOST_AUTO_TEST_CASE(SendCounterDirectoryPacketTest7)
 
     // Buffer with enough space
     MockBufferManager mockBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockBuffer);
+    SendCounterPacket sendCounterPacket(mockBuffer);
     BOOST_CHECK_THROW(sendCounterPacket.SendCounterDirectoryPacket(counterDirectory), armnn::RuntimeException);
 }
 
@@ -1798,20 +1772,21 @@ BOOST_AUTO_TEST_CASE(SendThreadTest0)
 
     MockProfilingConnection mockProfilingConnection;
     MockStreamCounterBuffer mockStreamCounterBuffer(0);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockStreamCounterBuffer);
+    SendCounterPacket sendCounterPacket(mockStreamCounterBuffer);
+    SendThread sendThread(profilingStateMachine, mockStreamCounterBuffer, sendCounterPacket);
 
     // Try to start the send thread many times, it must only start once
 
-    sendCounterPacket.Start(mockProfilingConnection);
-    BOOST_CHECK(sendCounterPacket.IsRunning());
-    sendCounterPacket.Start(mockProfilingConnection);
-    sendCounterPacket.Start(mockProfilingConnection);
-    sendCounterPacket.Start(mockProfilingConnection);
-    sendCounterPacket.Start(mockProfilingConnection);
-    BOOST_CHECK(sendCounterPacket.IsRunning());
+    sendThread.Start(mockProfilingConnection);
+    BOOST_CHECK(sendThread.IsRunning());
+    sendThread.Start(mockProfilingConnection);
+    sendThread.Start(mockProfilingConnection);
+    sendThread.Start(mockProfilingConnection);
+    sendThread.Start(mockProfilingConnection);
+    BOOST_CHECK(sendThread.IsRunning());
 
-    sendCounterPacket.Stop();
-    BOOST_CHECK(!sendCounterPacket.IsRunning());
+    sendThread.Stop();
+    BOOST_CHECK(!sendThread.IsRunning());
 }
 
 BOOST_AUTO_TEST_CASE(SendThreadTest1)
@@ -1823,8 +1798,9 @@ BOOST_AUTO_TEST_CASE(SendThreadTest1)
 
     MockProfilingConnection mockProfilingConnection;
     MockStreamCounterBuffer mockStreamCounterBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockStreamCounterBuffer);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(mockStreamCounterBuffer);
+    SendThread sendThread(profilingStateMachine, mockStreamCounterBuffer, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // Interleaving writes and reads to/from the buffer with pauses to test that the send thread actually waits for
     // something to become available for reading
@@ -1834,13 +1810,9 @@ BOOST_AUTO_TEST_CASE(SendThreadTest1)
     CounterDirectory counterDirectory;
     sendCounterPacket.SendStreamMetaDataPacket();
 
-    // Get the size of the Stream Metadata Packet
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
-    totalWrittenSize += streamMetadataPacketsize;
+    totalWrittenSize += GetStreamMetaDataPacketSize();
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -1850,7 +1822,7 @@ BOOST_AUTO_TEST_CASE(SendThreadTest1)
     unsigned int counterDirectoryPacketSize = 32;
     totalWrittenSize += counterDirectoryPacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -1864,7 +1836,7 @@ BOOST_AUTO_TEST_CASE(SendThreadTest1)
     unsigned int periodicCounterCapturePacketSize = 28;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -1902,7 +1874,7 @@ BOOST_AUTO_TEST_CASE(SendThreadTest1)
     periodicCounterCapturePacketSize = 40;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -1912,13 +1884,13 @@ BOOST_AUTO_TEST_CASE(SendThreadTest1)
     periodicCounterCapturePacketSize = 30;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     // To test an exact value of the "read size" in the mock buffer, wait to allow the send thread to
     // read all what's remaining in the buffer
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
-    sendCounterPacket.Stop();
+    sendThread.Stop();
 
     BOOST_CHECK(mockStreamCounterBuffer.GetCommittedSize() == totalWrittenSize);
     BOOST_CHECK(mockStreamCounterBuffer.GetReadableSize()  == totalWrittenSize);
@@ -1934,26 +1906,23 @@ BOOST_AUTO_TEST_CASE(SendThreadTest2)
 
     MockProfilingConnection mockProfilingConnection;
     MockStreamCounterBuffer mockStreamCounterBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockStreamCounterBuffer);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(mockStreamCounterBuffer);
+    SendThread sendThread(profilingStateMachine, mockStreamCounterBuffer, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // Adding many spurious "ready to read" signals throughout the test to check that the send thread is
     // capable of handling unnecessary read requests
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     CounterDirectory counterDirectory;
     sendCounterPacket.SendStreamMetaDataPacket();
 
-    // Get the size of the Stream Metadata Packet
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
-    totalWrittenSize += streamMetadataPacketsize;
+    totalWrittenSize += GetStreamMetaDataPacketSize();
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -1963,8 +1932,8 @@ BOOST_AUTO_TEST_CASE(SendThreadTest2)
     unsigned int counterDirectoryPacketSize = 32;
     totalWrittenSize += counterDirectoryPacketSize;
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -1978,17 +1947,17 @@ BOOST_AUTO_TEST_CASE(SendThreadTest2)
     unsigned int periodicCounterCapturePacketSize = 28;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendPeriodicCounterCapturePacket(44u,
                                                        {
                                                            { 211u,     923u }
@@ -2011,7 +1980,7 @@ BOOST_AUTO_TEST_CASE(SendThreadTest2)
     periodicCounterCapturePacketSize = 46;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendPeriodicCounterCapturePacket(997u,
                                                        {
                                                            {  88u,      11u },
@@ -2024,8 +1993,8 @@ BOOST_AUTO_TEST_CASE(SendThreadTest2)
     periodicCounterCapturePacketSize = 40;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
 
@@ -2035,13 +2004,11 @@ BOOST_AUTO_TEST_CASE(SendThreadTest2)
     periodicCounterCapturePacketSize = 30;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     // To test an exact value of the "read size" in the mock buffer, wait to allow the send thread to
     // read all what's remaining in the buffer
-    std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
-
-    sendCounterPacket.Stop();
+    sendThread.Stop();
 
     BOOST_CHECK(mockStreamCounterBuffer.GetCommittedSize() == totalWrittenSize);
     BOOST_CHECK(mockStreamCounterBuffer.GetReadableSize()  == totalWrittenSize);
@@ -2057,31 +2024,28 @@ BOOST_AUTO_TEST_CASE(SendThreadTest3)
 
     MockProfilingConnection mockProfilingConnection;
     MockStreamCounterBuffer mockStreamCounterBuffer(1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, mockStreamCounterBuffer);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(mockStreamCounterBuffer);
+    SendThread sendThread(profilingStateMachine, mockStreamCounterBuffer, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // Not using pauses or "grace periods" to stress test the send thread
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     CounterDirectory counterDirectory;
     sendCounterPacket.SendStreamMetaDataPacket();
 
-    // Get the size of the Stream Metadata Packet
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
-    totalWrittenSize += streamMetadataPacketsize;
+    totalWrittenSize += GetStreamMetaDataPacketSize();
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendCounterDirectoryPacket(counterDirectory);
 
     // Get the size of the Counter Directory Packet
     unsigned int counterDirectoryPacketSize =32;
     totalWrittenSize += counterDirectoryPacketSize;
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendPeriodicCounterCapturePacket(123u,
                                                        {
                                                            {   1u,      23u },
@@ -2092,11 +2056,11 @@ BOOST_AUTO_TEST_CASE(SendThreadTest3)
     unsigned int periodicCounterCapturePacketSize = 28;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendPeriodicCounterCapturePacket(44u,
                                                        {
                                                            { 211u,     923u }
@@ -2119,8 +2083,8 @@ BOOST_AUTO_TEST_CASE(SendThreadTest3)
     periodicCounterCapturePacketSize = 46;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendPeriodicCounterCapturePacket(997u,
                                                        {
                                                            {  88u,      11u },
@@ -2133,19 +2097,19 @@ BOOST_AUTO_TEST_CASE(SendThreadTest3)
     periodicCounterCapturePacketSize = 40;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
+    sendThread.SetReadyToRead();
     sendCounterPacket.SendPeriodicCounterSelectionPacket(1000u, { 1345u, 254u, 4536u, 408u, 54u, 6323u, 428u, 1u, 6u });
 
     // Get the size of the Periodic Counter Capture Packet
     periodicCounterCapturePacketSize = 30;
     totalWrittenSize += periodicCounterCapturePacketSize;
 
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
     // Abruptly terminating the send thread, the amount of data sent may be less that the amount written (the send
     // thread is not guaranteed to flush the buffer)
-    sendCounterPacket.Stop();
+    sendThread.Stop();
 
     BOOST_CHECK(mockStreamCounterBuffer.GetCommittedSize() == totalWrittenSize);
     BOOST_CHECK(mockStreamCounterBuffer.GetReadableSize()  <= totalWrittenSize);
@@ -2154,94 +2118,38 @@ BOOST_AUTO_TEST_CASE(SendThreadTest3)
     BOOST_CHECK(mockStreamCounterBuffer.GetReadSize()      <= mockStreamCounterBuffer.GetCommittedSize());
 }
 
-BOOST_AUTO_TEST_CASE(SendThreadBufferTest)
+BOOST_AUTO_TEST_CASE(SendCounterPacketTestWithSendThread)
 {
     ProfilingStateMachine profilingStateMachine;
-    SetActiveProfilingState(profilingStateMachine);
+    SetWaitingForAckProfilingState(profilingStateMachine);
 
     MockProfilingConnection mockProfilingConnection;
     BufferManager bufferManager(1, 1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, bufferManager, -1);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(bufferManager);
+    SendThread sendThread(profilingStateMachine, bufferManager, sendCounterPacket, -1);
+    sendThread.Start(mockProfilingConnection);
 
-    // Interleaving writes and reads to/from the buffer with pauses to test that the send thread actually waits for
-    // something to become available for reading
-    std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
+    unsigned int streamMetadataPacketsize = GetStreamMetaDataPacketSize();
 
-    // SendStreamMetaDataPacket
-    sendCounterPacket.SendStreamMetaDataPacket();
+    sendThread.Stop();
 
-    // Read data from the buffer
-    // Buffer should become readable after commit by SendStreamMetaDataPacket
-    auto packetBuffer = bufferManager.GetReadableBuffer();
-    BOOST_TEST(packetBuffer.get());
-
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
-    BOOST_TEST(packetBuffer->GetSize() == streamMetadataPacketsize);
-
-    // Buffer is not available when SendStreamMetaDataPacket already occupied the buffer.
-    unsigned int reservedSize = 0;
-    auto reservedBuffer = bufferManager.Reserve(512, reservedSize);
-    BOOST_TEST(!reservedBuffer.get());
-
-    // Recommit to be read by sendCounterPacket
-    bufferManager.Commit(packetBuffer, streamMetadataPacketsize);
-
-    sendCounterPacket.SetReadyToRead();
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
-
-    // The buffer is read by the send thread so it should not be in the readable buffer.
-    auto readBuffer = bufferManager.GetReadableBuffer();
-    BOOST_TEST(!readBuffer);
-
-    // Successfully reserved the buffer with requested size
-    reservedBuffer = bufferManager.Reserve(512, reservedSize);
-    BOOST_TEST(reservedSize == 512);
-    BOOST_TEST(reservedBuffer.get());
+    // check for packet in ProfilingConnection
+    BOOST_CHECK(mockProfilingConnection.CheckForPacket({PacketType::StreamMetaData, streamMetadataPacketsize}) == 1);
 
-    // Release the buffer to be used by sendCounterPacket
-    bufferManager.Release(reservedBuffer);
+    SetActiveProfilingState(profilingStateMachine);
+    sendThread.Start(mockProfilingConnection);
 
     // SendCounterDirectoryPacket
     CounterDirectory counterDirectory;
     sendCounterPacket.SendCounterDirectoryPacket(counterDirectory);
 
-    // Read data from the buffer
-    // Buffer should become readable after commit by SendCounterDirectoryPacket
-    auto counterDirectoryPacketBuffer = bufferManager.GetReadableBuffer();
-    BOOST_TEST(counterDirectoryPacketBuffer.get());
-
-    // Get the size of the Counter Directory Packet
+    sendThread.Stop();
     unsigned int counterDirectoryPacketSize = 32;
-    BOOST_TEST(counterDirectoryPacketBuffer->GetSize() == counterDirectoryPacketSize);
-
-    // Buffer is not available when SendCounterDirectoryPacket already occupied the buffer.
-    reservedSize = 0;
-    reservedBuffer = bufferManager.Reserve(512, reservedSize);
-    BOOST_TEST(reservedSize == 0);
-    BOOST_TEST(!reservedBuffer.get());
-
-    // Recommit to be read by sendCounterPacket
-    bufferManager.Commit(counterDirectoryPacketBuffer, counterDirectoryPacketSize);
-
-    sendCounterPacket.SetReadyToRead();
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
+    // check for packet in ProfilingConnection
+    BOOST_CHECK(mockProfilingConnection.CheckForPacket(
+        {PacketType::CounterDirectory, counterDirectoryPacketSize}) == 1);
 
-    // The buffer is read by the send thread so it should not be in the readable buffer.
-    readBuffer = bufferManager.GetReadableBuffer();
-    BOOST_TEST(!readBuffer);
-
-    // Successfully reserved the buffer with requested size
-    reservedBuffer = bufferManager.Reserve(512, reservedSize);
-    BOOST_TEST(reservedSize == 512);
-    BOOST_TEST(reservedBuffer.get());
-
-    // Release the buffer to be used by sendCounterPacket
-    bufferManager.Release(reservedBuffer);
+    sendThread.Start(mockProfilingConnection);
 
     // SendPeriodicCounterCapturePacket
     sendCounterPacket.SendPeriodicCounterCapturePacket(123u,
@@ -2250,49 +2158,23 @@ BOOST_AUTO_TEST_CASE(SendThreadBufferTest)
                                                            {  33u, 1207623u }
                                                        });
 
-    // Read data from the buffer
-    // Buffer should become readable after commit by SendPeriodicCounterCapturePacket
-    auto periodicCounterCapturePacketBuffer = bufferManager.GetReadableBuffer();
-    BOOST_TEST(periodicCounterCapturePacketBuffer.get());
+    sendThread.Stop();
 
-    // Get the size of the Periodic Counter Capture Packet
     unsigned int periodicCounterCapturePacketSize = 28;
-    BOOST_TEST(periodicCounterCapturePacketBuffer->GetSize() == periodicCounterCapturePacketSize);
-
-    // Buffer is not available when SendPeriodicCounterCapturePacket already occupied the buffer.
-    reservedSize = 0;
-    reservedBuffer = bufferManager.Reserve(512, reservedSize);
-    BOOST_TEST(reservedSize == 0);
-    BOOST_TEST(!reservedBuffer.get());
-
-    // Recommit to be read by sendCounterPacket
-    bufferManager.Commit(periodicCounterCapturePacketBuffer, periodicCounterCapturePacketSize);
-
-    sendCounterPacket.SetReadyToRead();
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(WAIT_UNTIL_READABLE_MS));
-
-    // The buffer is read by the send thread so it should not be in the readable buffer.
-    readBuffer = bufferManager.GetReadableBuffer();
-    BOOST_TEST(!readBuffer);
-
-    // Successfully reserved the buffer with requested size
-    reservedBuffer = bufferManager.Reserve(512, reservedSize);
-    BOOST_TEST(reservedSize == 512);
-    BOOST_TEST(reservedBuffer.get());
-
-    sendCounterPacket.Stop();
+    BOOST_CHECK(mockProfilingConnection.CheckForPacket(
+        {PacketType::PeriodicCounterCapture, periodicCounterCapturePacketSize}) == 1);
 }
 
-BOOST_AUTO_TEST_CASE(SendThreadBufferTest1)
+BOOST_AUTO_TEST_CASE(SendThreadBufferTest)
 {
     ProfilingStateMachine profilingStateMachine;
     SetActiveProfilingState(profilingStateMachine);
 
     MockProfilingConnection mockProfilingConnection;
     BufferManager bufferManager(3, 1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, bufferManager, -1);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(bufferManager);
+    SendThread sendThread(profilingStateMachine, bufferManager, sendCounterPacket, -1);
+    sendThread.Start(mockProfilingConnection);
 
     // SendStreamMetaDataPacket
     sendCounterPacket.SendStreamMetaDataPacket();
@@ -2302,22 +2184,16 @@ BOOST_AUTO_TEST_CASE(SendThreadBufferTest1)
     auto packetBuffer = bufferManager.GetReadableBuffer();
     BOOST_TEST(packetBuffer.get());
 
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
+    unsigned int streamMetadataPacketsize = GetStreamMetaDataPacketSize();
     BOOST_TEST(packetBuffer->GetSize() == streamMetadataPacketsize);
 
     // Recommit to be read by sendCounterPacket
     bufferManager.Commit(packetBuffer, streamMetadataPacketsize);
 
-    sendCounterPacket.SetReadyToRead();
-
     // SendCounterDirectoryPacket
     CounterDirectory counterDirectory;
     sendCounterPacket.SendCounterDirectoryPacket(counterDirectory);
 
-    sendCounterPacket.SetReadyToRead();
-
     // SendPeriodicCounterCapturePacket
     sendCounterPacket.SendPeriodicCounterCapturePacket(123u,
                                                        {
@@ -2325,9 +2201,7 @@ BOOST_AUTO_TEST_CASE(SendThreadBufferTest1)
                                                            {  33u, 1207623u }
                                                        });
 
-    sendCounterPacket.SetReadyToRead();
-
-    sendCounterPacket.Stop();
+    sendThread.Stop();
 
     // The buffer is read by the send thread so it should not be in the readable buffer.
     auto readBuffer = bufferManager.GetReadableBuffer();
@@ -2339,16 +2213,15 @@ BOOST_AUTO_TEST_CASE(SendThreadBufferTest1)
     BOOST_TEST(reservedSize == 512);
     BOOST_TEST(reservedBuffer.get());
 
-    // Check that data was actually written to the profiling connection in any order
-    const std::vector<uint32_t> writtenData = mockProfilingConnection.GetWrittenData();
-    BOOST_TEST(writtenData.size() == 3);
-    bool foundStreamMetaDataPacket =
-        std::find(writtenData.begin(), writtenData.end(), streamMetadataPacketsize) != writtenData.end();
-    bool foundCounterDirectoryPacket = std::find(writtenData.begin(), writtenData.end(), 32) != writtenData.end();
-    bool foundPeriodicCounterCapturePacket = std::find(writtenData.begin(), writtenData.end(), 28) != writtenData.end();
-    BOOST_TEST(foundStreamMetaDataPacket);
-    BOOST_TEST(foundCounterDirectoryPacket);
-    BOOST_TEST(foundPeriodicCounterCapturePacket);
+    const auto writtenDataSize = mockProfilingConnection.GetWrittenDataSize();
+    const auto metaDataPacketCount =
+            mockProfilingConnection.CheckForPacket({PacketType::StreamMetaData, streamMetadataPacketsize});
+
+    BOOST_TEST(metaDataPacketCount >= 1);
+    BOOST_TEST(mockProfilingConnection.CheckForPacket({PacketType::CounterDirectory, 32}) == 1);
+    BOOST_TEST(mockProfilingConnection.CheckForPacket({PacketType::PeriodicCounterCapture, 28}) == 1);
+    // Check that we only received the packets we expected
+    BOOST_TEST(metaDataPacketCount + 2 == writtenDataSize);
 }
 
 BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket1)
@@ -2357,11 +2230,12 @@ BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket1)
 
     MockProfilingConnection mockProfilingConnection;
     BufferManager bufferManager(3, 1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, bufferManager);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(bufferManager);
+    SendThread sendThread(profilingStateMachine, bufferManager, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // The profiling state is set to "Uninitialized", so the send thread should throw an exception
-    BOOST_CHECK_THROW(sendCounterPacket.Stop(), armnn::RuntimeException);
+    BOOST_CHECK_THROW(sendThread.Stop(), armnn::RuntimeException);
 }
 
 BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket2)
@@ -2371,11 +2245,12 @@ BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket2)
 
     MockProfilingConnection mockProfilingConnection;
     BufferManager bufferManager(3, 1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, bufferManager);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(bufferManager);
+    SendThread sendThread(profilingStateMachine, bufferManager, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // The profiling state is set to "NotConnected", so the send thread should throw an exception
-    BOOST_CHECK_THROW(sendCounterPacket.Stop(), armnn::RuntimeException);
+    BOOST_CHECK_THROW(sendThread.Stop(), armnn::RuntimeException);
 }
 
 BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket3)
@@ -2383,24 +2258,24 @@ BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket3)
     ProfilingStateMachine profilingStateMachine;
     SetWaitingForAckProfilingState(profilingStateMachine);
 
-    // Calculate the size of a Stream Metadata packet
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
+    unsigned int streamMetadataPacketsize = GetStreamMetaDataPacketSize();
 
     MockProfilingConnection mockProfilingConnection;
     BufferManager bufferManager(3, 1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, bufferManager);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(bufferManager);
+    SendThread sendThread(profilingStateMachine, bufferManager, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // The profiling state is set to "WaitingForAck", so the send thread should send a Stream Metadata packet
-    // Wait for sendCounterPacket to join
-    BOOST_CHECK_NO_THROW(sendCounterPacket.Stop());
+    // Wait for sendThread to join
+    BOOST_CHECK_NO_THROW(sendThread.Stop());
 
-    // Check that the buffer contains at least one Stream Metadata packet
-    const std::vector<uint32_t> writtenData = mockProfilingConnection.GetWrittenData();
-    BOOST_TEST(writtenData.size() >= 1);
-    BOOST_TEST(writtenData[0] == streamMetadataPacketsize);
+    // Check that the buffer contains at least one Stream Metadata packet and no other packets
+    const auto writtenDataSize = mockProfilingConnection.GetWrittenDataSize();
+
+    BOOST_TEST(writtenDataSize >= 1);
+    BOOST_TEST(mockProfilingConnection.CheckForPacket(
+                  {PacketType::StreamMetaData, streamMetadataPacketsize}) == writtenDataSize);
 }
 
 BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket4)
@@ -2408,43 +2283,45 @@ BOOST_AUTO_TEST_CASE(SendThreadSendStreamMetadataPacket4)
     ProfilingStateMachine profilingStateMachine;
     SetWaitingForAckProfilingState(profilingStateMachine);
 
-    // Calculate the size of a Stream Metadata packet
-    std::string processName = GetProcessName().substr(0, 60);
-    unsigned int processNameSize = processName.empty() ? 0 : boost::numeric_cast<unsigned int>(processName.size()) + 1;
-    unsigned int streamMetadataPacketsize = 118 + processNameSize;
+    unsigned int streamMetadataPacketsize = GetStreamMetaDataPacketSize();
 
     MockProfilingConnection mockProfilingConnection;
     BufferManager bufferManager(3, 1024);
-    SendCounterPacket sendCounterPacket(profilingStateMachine, bufferManager);
-    sendCounterPacket.Start(mockProfilingConnection);
+    SendCounterPacket sendCounterPacket(bufferManager);
+    SendThread sendThread(profilingStateMachine, bufferManager, sendCounterPacket);
+    sendThread.Start(mockProfilingConnection);
 
     // The profiling state is set to "WaitingForAck", so the send thread should send a Stream Metadata packet
-    // Wait for sendCounterPacket to join
-    sendCounterPacket.Stop();
+    // Wait for sendThread to join
+    sendThread.Stop();
 
-    sendCounterPacket.Start(mockProfilingConnection);
+    sendThread.Start(mockProfilingConnection);
     // Check that the profiling state is still "WaitingForAck"
     BOOST_TEST((profilingStateMachine.GetCurrentState() == ProfilingState::WaitingForAck));
 
     // Check that the buffer contains at least one Stream Metadata packet
-    const std::vector<uint32_t> writtenData = mockProfilingConnection.GetWrittenData();
-    BOOST_TEST(writtenData.size() >= 1);
-    BOOST_TEST(writtenData[0] == streamMetadataPacketsize);
+    BOOST_TEST(mockProfilingConnection.CheckForPacket({PacketType::StreamMetaData, streamMetadataPacketsize}) >= 1);
 
     mockProfilingConnection.Clear();
 
+    sendThread.Stop();
+    sendThread.Start(mockProfilingConnection);
+
     // Try triggering a new buffer read
-    sendCounterPacket.SetReadyToRead();
+    sendThread.SetReadyToRead();
 
-    // Wait for sendCounterPacket to join
-    BOOST_CHECK_NO_THROW(sendCounterPacket.Stop());
+    // Wait for sendThread to join
+    BOOST_CHECK_NO_THROW(sendThread.Stop());
 
     // Check that the profiling state is still "WaitingForAck"
     BOOST_TEST((profilingStateMachine.GetCurrentState() == ProfilingState::WaitingForAck));
 
-    // Check that the buffer contains at least one Stream Metadata packet
-    BOOST_TEST(writtenData.size() >= 1);
-    BOOST_TEST(writtenData[0] == streamMetadataPacketsize);
+    // Check that the buffer contains at least one Stream Metadata packet and no other packets
+    const auto writtenDataSize = mockProfilingConnection.GetWrittenDataSize();
+
+    BOOST_TEST(writtenDataSize >= 1);
+    BOOST_TEST(mockProfilingConnection.CheckForPacket(
+                  {PacketType::StreamMetaData, streamMetadataPacketsize}) == writtenDataSize);
 }
 
 BOOST_AUTO_TEST_SUITE_END()