From f341d7a4091a6ec1c1a4a49c4810d4a8b036ad60 Mon Sep 17 00:00:00 2001
From: Alex Langford <alangford@apple.com>
Date: Tue, 7 Mar 2023 10:12:14 -0800
Subject: [PATCH] [lldb] Make MemoryCache::Read more resilient

MemoryCache::Read is not resilient to partial reads when reading memory
chunks less than or equal in size to L2 cache lines. There have been
attempts in the past to fix this but nothing really solved the root of
the issue.

I first created a test exercising MemoryCache's implementation and
documenting how I believe MemoryCache::Read should behave. I then
rewrote the implementation of MemoryCache::Read as needed to make sure
that the different scenarios behaved correctly.

rdar://105407095

Differential Revision: https://reviews.llvm.org/D145624
---
 lldb/include/lldb/Target/Memory.h    |   2 +
 lldb/source/Target/Memory.cpp        | 202 ++++++++++++++++---------------
 lldb/unittests/Target/CMakeLists.txt |   2 +
 lldb/unittests/Target/MemoryTest.cpp | 228 +++++++++++++++++++++++++++++++++++
 4 files changed, 340 insertions(+), 94 deletions(-)
 create mode 100644 lldb/unittests/Target/MemoryTest.cpp
diff --git a/lldb/include/lldb/Target/Memory.h b/lldb/include/lldb/Target/Memory.h
index 8c564e8..2d1489a 100644
--- a/lldb/include/lldb/Target/Memory.h
+++ b/lldb/include/lldb/Target/Memory.h
@@ -61,6 +61,8 @@ protected:
 private:
   MemoryCache(const MemoryCache &) = delete;
   const MemoryCache &operator=(const MemoryCache &) = delete;
+
+  lldb::DataBufferSP GetL2CacheLine(lldb::addr_t addr, Status &error);
 };
 
     
diff --git a/lldb/source/Target/Memory.cpp b/lldb/source/Target/Memory.cpp
index fdc0138..45786415 100644
--- a/lldb/source/Target/Memory.cpp
+++ b/lldb/source/Target/Memory.cpp
@@ -123,18 +123,55 @@ bool MemoryCache::RemoveInvalidRange(lldb::addr_t base_addr,
   return false;
 }
 
+lldb::DataBufferSP MemoryCache::GetL2CacheLine(lldb::addr_t line_base_addr,
+                                               Status &error) {
+  // This function assumes that the address given is aligned correctly.
+  assert((line_base_addr % m_L2_cache_line_byte_size) == 0);
+
+  std::lock_guard<std::recursive_mutex> guard(m_mutex);
+  auto pos = m_L2_cache.find(line_base_addr);
+  if (pos != m_L2_cache.end())
+    return pos->second;
+
+  auto data_buffer_heap_sp =
+      std::make_shared<DataBufferHeap>(m_L2_cache_line_byte_size, 0);
+  size_t process_bytes_read = m_process.ReadMemoryFromInferior(
+      line_base_addr, data_buffer_heap_sp->GetBytes(),
+      data_buffer_heap_sp->GetByteSize(), error);
+
+  // If we failed a read, not much we can do.
+  if (process_bytes_read == 0)
+    return lldb::DataBufferSP();
+
+  // If we didn't get a complete read, we can still cache what we did get.
+  if (process_bytes_read < m_L2_cache_line_byte_size)
+    data_buffer_heap_sp->SetByteSize(process_bytes_read);
+
+  m_L2_cache[line_base_addr] = data_buffer_heap_sp;
+  return data_buffer_heap_sp;
+}
+
 size_t MemoryCache::Read(addr_t addr, void *dst, size_t dst_len,
                          Status &error) {
-  size_t bytes_left = dst_len;
-
-  // Check the L1 cache for a range that contain the entire memory read. If we
-  // find a range in the L1 cache that does, we use it. Else we fall back to
-  // reading memory in m_L2_cache_line_byte_size byte sized chunks. The L1
-  // cache contains chunks of memory that are not required to be
-  // m_L2_cache_line_byte_size bytes in size, so we don't try anything tricky
-  // when reading from them (no partial reads from the L1 cache).
+  if (!dst || dst_len == 0)
+    return 0;
 
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
+  // FIXME: We should do a more thorough check to make sure that we're not
+  // overlapping with any invalid ranges (e.g. Read 0x100 - 0x200 but there's an
+  // invalid range 0x180 - 0x280). `FindEntryThatContains` has an implementation
+  // that takes a range, but it only checks to see if the argument is contained
+  // by an existing invalid range. It cannot check if the argument contains
+  // invalid ranges and cannot check for overlaps.
+  if (m_invalid_ranges.FindEntryThatContains(addr)) {
+    error.SetErrorStringWithFormat("memory read failed for 0x%" PRIx64, addr);
+    return 0;
+  }
+
+  // Check the L1 cache for a range that contains the entire memory read.
+  // L1 cache contains chunks of memory that are not required to be the size of
+  // an L2 cache line. We avoid trying to do partial reads from the L1 cache to
+  // simplify the implementation.
   if (!m_L1_cache.empty()) {
     AddrRange read_range(addr, dst_len);
     BlockMap::iterator pos = m_L1_cache.upper_bound(addr);
@@ -149,105 +186,82 @@ size_t MemoryCache::Read(addr_t addr, void *dst, size_t dst_len,
     }
   }
 
-  // If this memory read request is larger than the cache line size, then we
-  // (1) try to read as much of it at once as possible, and (2) don't add the
-  // data to the memory cache.  We don't want to split a big read up into more
-  // separate reads than necessary, and with a large memory read request, it is
-  // unlikely that the caller function will ask for the next
-  // 4 bytes after the large memory read - so there's little benefit to saving
-  // it in the cache.
-  if (dst && dst_len > m_L2_cache_line_byte_size) {
+  // If the size of the read is greater than the size of an L2 cache line, we'll
+  // just read from the inferior. If that read is successful, we'll cache what
+  // we read in the L1 cache for future use.
+  if (dst_len > m_L2_cache_line_byte_size) {
     size_t bytes_read =
         m_process.ReadMemoryFromInferior(addr, dst, dst_len, error);
-    // Add this non block sized range to the L1 cache if we actually read
-    // anything
     if (bytes_read > 0)
       AddL1CacheData(addr, dst, bytes_read);
     return bytes_read;
   }
 
-  if (dst && bytes_left > 0) {
-    const uint32_t cache_line_byte_size = m_L2_cache_line_byte_size;
-    uint8_t *dst_buf = (uint8_t *)dst;
-    addr_t curr_addr = addr - (addr % cache_line_byte_size);
-    addr_t cache_offset = addr - curr_addr;
-
-    while (bytes_left > 0) {
-      if (m_invalid_ranges.FindEntryThatContains(curr_addr)) {
-        error.SetErrorStringWithFormat("memory read failed for 0x%" PRIx64,
-                                       curr_addr);
-        return dst_len - bytes_left;
-      }
-
-      BlockMap::const_iterator pos = m_L2_cache.find(curr_addr);
-      BlockMap::const_iterator end = m_L2_cache.end();
-
-      if (pos != end) {
-        size_t curr_read_size = cache_line_byte_size - cache_offset;
-        if (curr_read_size > bytes_left)
-          curr_read_size = bytes_left;
-
-        memcpy(dst_buf + dst_len - bytes_left,
-               pos->second->GetBytes() + cache_offset, curr_read_size);
-
-        bytes_left -= curr_read_size;
-        curr_addr += curr_read_size + cache_offset;
-        cache_offset = 0;
-
-        if (bytes_left > 0) {
-          // Get sequential cache page hits
-          for (++pos; (pos != end) && (bytes_left > 0); ++pos) {
-            assert((curr_addr % cache_line_byte_size) == 0);
-
-            if (pos->first != curr_addr)
-              break;
-
-            curr_read_size = pos->second->GetByteSize();
-            if (curr_read_size > bytes_left)
-              curr_read_size = bytes_left;
+  // If the size of the read fits inside one L2 cache line, we'll try reading
+  // from the L2 cache. Note that if the range of memory we're reading sits
+  // between two contiguous cache lines, we'll touch two cache lines instead of
+  // just one.
+
+  // We're going to have all of our loads and reads be cache line aligned.
+  addr_t cache_line_offset = addr % m_L2_cache_line_byte_size;
+  addr_t cache_line_base_addr = addr - cache_line_offset;
+  DataBufferSP first_cache_line = GetL2CacheLine(cache_line_base_addr, error);
+  // If we get nothing, then the read to the inferior likely failed. Nothing to
+  // do here.
+  if (!first_cache_line)
+    return 0;
+
+  // If the cache line was not filled out completely and the offset is greater
+  // than what we have available, we can't do anything further here.
+  if (cache_line_offset >= first_cache_line->GetByteSize())
+    return 0;
+
+  uint8_t *dst_buf = (uint8_t *)dst;
+  size_t bytes_left = dst_len;
+  size_t read_size = first_cache_line->GetByteSize() - cache_line_offset;
+  if (read_size > bytes_left)
+    read_size = bytes_left;
+
+  memcpy(dst_buf + dst_len - bytes_left,
+         first_cache_line->GetBytes() + cache_line_offset, read_size);
+  bytes_left -= read_size;
+
+  // If the cache line was not filled out completely and we still have data to
+  // read, we can't do anything further.
+  if (first_cache_line->GetByteSize() < m_L2_cache_line_byte_size &&
+      bytes_left > 0)
+    return dst_len - bytes_left;
+
+  // We'll hit this scenario if our read straddles two cache lines.
+  if (bytes_left > 0) {
+    cache_line_base_addr += m_L2_cache_line_byte_size;
+
+    // FIXME: Until we are able to more thoroughly check for invalid ranges, we
+    // will have to check the second line to see if it is in an invalid range as
+    // well. See the check near the beginning of the function for more details.
+    if (m_invalid_ranges.FindEntryThatContains(cache_line_base_addr)) {
+      error.SetErrorStringWithFormat("memory read failed for 0x%" PRIx64,
+                                     cache_line_base_addr);
+      return dst_len - bytes_left;
+    }
 
-            memcpy(dst_buf + dst_len - bytes_left, pos->second->GetBytes(),
-                   curr_read_size);
+    DataBufferSP second_cache_line =
+        GetL2CacheLine(cache_line_base_addr, error);
+    if (!second_cache_line)
+      return dst_len - bytes_left;
 
-            bytes_left -= curr_read_size;
-            curr_addr += curr_read_size;
+    read_size = bytes_left;
+    if (read_size > second_cache_line->GetByteSize())
+      read_size = second_cache_line->GetByteSize();
 
-            // We have a cache page that succeeded to read some bytes but not
-            // an entire page. If this happens, we must cap off how much data
-            // we are able to read...
-            if (pos->second->GetByteSize() != cache_line_byte_size)
-              return dst_len - bytes_left;
-          }
-        }
-      }
+    memcpy(dst_buf + dst_len - bytes_left, second_cache_line->GetBytes(),
+           read_size);
+    bytes_left -= read_size;
 
-      // We need to read from the process
-
-      if (bytes_left > 0) {
-        assert((curr_addr % cache_line_byte_size) == 0);
-        std::unique_ptr<DataBufferHeap> data_buffer_heap_up(
-            new DataBufferHeap(cache_line_byte_size, 0));
-        size_t process_bytes_read = m_process.ReadMemoryFromInferior(
-            curr_addr, data_buffer_heap_up->GetBytes(),
-            data_buffer_heap_up->GetByteSize(), error);
-        if (process_bytes_read == 0)
-          return dst_len - bytes_left;
-
-        if (process_bytes_read != cache_line_byte_size) {
-          data_buffer_heap_up->SetByteSize(process_bytes_read);
-          if (process_bytes_read < data_buffer_heap_up->GetByteSize()) {
-            dst_len -= data_buffer_heap_up->GetByteSize() - process_bytes_read;
-            bytes_left = process_bytes_read;
-          }
-        }
-        m_L2_cache[curr_addr] = DataBufferSP(data_buffer_heap_up.release());
-        // We have read data and put it into the cache, continue through the
-        // loop again to get the data out of the cache...
-      }
-    }
+    return dst_len - bytes_left;
   }
 
-  return dst_len - bytes_left;
+  return dst_len;
 }
 
 AllocatedBlock::AllocatedBlock(lldb::addr_t addr, uint32_t byte_size,
diff --git a/lldb/unittests/Target/CMakeLists.txt b/lldb/unittests/Target/CMakeLists.txt
index c16f0fc..772d584 100644
--- a/lldb/unittests/Target/CMakeLists.txt
+++ b/lldb/unittests/Target/CMakeLists.txt
@@ -3,6 +3,7 @@ add_lldb_unittest(TargetTests
   DynamicRegisterInfoTest.cpp
   ExecutionContextTest.cpp
   MemoryRegionInfoTest.cpp
+  MemoryTest.cpp
   MemoryTagMapTest.cpp
   ModuleCacheTest.cpp
   PathMappingListTest.cpp
@@ -15,6 +16,7 @@ add_lldb_unittest(TargetTests
       lldbHost
       lldbPluginObjectFileELF
       lldbPluginPlatformLinux
+      lldbPluginPlatformMacOSX
       lldbPluginSymbolFileSymtab
       lldbTarget
       lldbSymbol
diff --git a/lldb/unittests/Target/MemoryTest.cpp b/lldb/unittests/Target/MemoryTest.cpp
new file mode 100644
index 0000000..4d7c4d5
--- /dev/null
+++ b/lldb/unittests/Target/MemoryTest.cpp
@@ -0,0 +1,228 @@
+//===-- MemoryTest.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/Memory.h"
+#include "Plugins/Platform/MacOSX/PlatformMacOSX.h"
+#include "Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/HostInfo.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Target.h"
+#include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/DataBufferHeap.h"
+#include "gtest/gtest.h"
+
+using namespace lldb_private;
+using namespace lldb_private::repro;
+using namespace lldb;
+
+namespace {
+class MemoryTest : public ::testing::Test {
+public:
+  void SetUp() override {
+    FileSystem::Initialize();
+    HostInfo::Initialize();
+    PlatformMacOSX::Initialize();
+  }
+  void TearDown() override {
+    PlatformMacOSX::Terminate();
+    HostInfo::Terminate();
+    FileSystem::Terminate();
+  }
+};
+
+class DummyProcess : public Process {
+public:
+  DummyProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp)
+      : Process(target_sp, listener_sp), m_bytes_left(0) {}
+
+  // Required overrides
+  bool CanDebug(lldb::TargetSP target, bool plugin_specified_by_name) override {
+    return true;
+  }
+  Status DoDestroy() override { return {}; }
+  void RefreshStateAfterStop() override {}
+  size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
+                      Status &error) override {
+    if (m_bytes_left == 0)
+      return 0;
+
+    size_t num_bytes_to_write = size;
+    if (m_bytes_left < size) {
+      num_bytes_to_write = m_bytes_left;
+      m_bytes_left = 0;
+    } else {
+      m_bytes_left -= size;
+    }
+
+    memset(buf, 'B', num_bytes_to_write);
+    return num_bytes_to_write;
+  }
+  bool DoUpdateThreadList(ThreadList &old_thread_list,
+                          ThreadList &new_thread_list) override {
+    return false;
+  }
+  llvm::StringRef GetPluginName() override { return "Dummy"; }
+
+  // Test-specific additions
+  size_t m_bytes_left;
+  MemoryCache &GetMemoryCache() { return m_memory_cache; }
+  void SetMaxReadSize(size_t size) { m_bytes_left = size; }
+};
+} // namespace
+
+TargetSP CreateTarget(DebuggerSP &debugger_sp, ArchSpec &arch) {
+  PlatformSP platform_sp;
+  TargetSP target_sp;
+  debugger_sp->GetTargetList().CreateTarget(
+      *debugger_sp, "", arch, eLoadDependentsNo, platform_sp, target_sp);
+  return target_sp;
+}
+
+TEST_F(MemoryTest, TesetMemoryCacheRead) {
+  ArchSpec arch("x86_64-apple-macosx-");
+
+  Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch));
+
+  DebuggerSP debugger_sp = Debugger::CreateInstance();
+  ASSERT_TRUE(debugger_sp);
+
+  TargetSP target_sp = CreateTarget(debugger_sp, arch);
+  ASSERT_TRUE(target_sp);
+
+  ListenerSP listener_sp(Listener::MakeListener("dummy"));
+  ProcessSP process_sp = std::make_shared<DummyProcess>(target_sp, listener_sp);
+  ASSERT_TRUE(process_sp);
+
+  DummyProcess *process = static_cast<DummyProcess *>(process_sp.get());
+  MemoryCache &mem_cache = process->GetMemoryCache();
+  const uint64_t l2_cache_size = process->GetMemoryCacheLineSize();
+  Status error;
+  auto data_sp = std::make_shared<DataBufferHeap>(l2_cache_size * 2, '\0');
+  size_t bytes_read = 0;
+
+  // Cache empty, memory read fails, size > l2 cache size
+  process->SetMaxReadSize(0);
+  bytes_read = mem_cache.Read(0x1000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == 0);
+
+  // Cache empty, memory read fails, size <= l2 cache size
+  data_sp->SetByteSize(l2_cache_size);
+  bytes_read = mem_cache.Read(0x1000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == 0);
+
+  // Cache empty, memory read succeeds, size > l2 cache size
+  process->SetMaxReadSize(l2_cache_size * 4);
+  data_sp->SetByteSize(l2_cache_size * 2);
+  bytes_read = mem_cache.Read(0x1000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == data_sp->GetByteSize());
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size * 2);
+
+  // Reading data previously cached (not in L2 cache).
+  data_sp->SetByteSize(l2_cache_size + 1);
+  bytes_read = mem_cache.Read(0x1000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == data_sp->GetByteSize());
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size * 2); // Verify we didn't
+                                                           // read from the
+                                                           // inferior.
+
+  // Read from a different address, but make the size == l2 cache size.
+  // This should fill in a the L2 cache.
+  data_sp->SetByteSize(l2_cache_size);
+  bytes_read = mem_cache.Read(0x2000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == data_sp->GetByteSize());
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size);
+
+  // Read from that L2 cache entry but read less than size of the cache line.
+  // Additionally, read from an offset.
+  data_sp->SetByteSize(l2_cache_size - 5);
+  bytes_read = mem_cache.Read(0x2001, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == data_sp->GetByteSize());
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size); // Verify we didn't read
+                                                       // from the inferior.
+
+  // What happens if we try to populate an L2 cache line but the read gives less
+  // than the size of a cache line?
+  process->SetMaxReadSize(l2_cache_size - 10);
+  data_sp->SetByteSize(l2_cache_size - 5);
+  bytes_read = mem_cache.Read(0x3000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == l2_cache_size - 10);
+  ASSERT_TRUE(process->m_bytes_left == 0);
+
+  // What happens if we have a partial L2 cache line filled in and we try to
+  // read the part that isn't filled in?
+  data_sp->SetByteSize(10);
+  bytes_read = mem_cache.Read(0x3000 + l2_cache_size - 10, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == 0); // The last 10 bytes from this line are
+                                // missing and we should be reading nothing
+                                // here.
+
+  // What happens when we try to straddle 2 cache lines?
+  process->SetMaxReadSize(l2_cache_size * 2);
+  data_sp->SetByteSize(l2_cache_size);
+  bytes_read = mem_cache.Read(0x4001, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == l2_cache_size);
+  ASSERT_TRUE(process->m_bytes_left == 0);
+
+  // What happens when we try to straddle 2 cache lines where the first one is
+  // only partially filled?
+  process->SetMaxReadSize(l2_cache_size - 1);
+  data_sp->SetByteSize(l2_cache_size);
+  bytes_read = mem_cache.Read(0x5005, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == l2_cache_size - 6); // Ignoring the first 5 bytes,
+                                                // missing the last byte
+  ASSERT_TRUE(process->m_bytes_left == 0);
+
+  // What happens if we add an invalid range and try to do a read larger than
+  // a cache line?
+  mem_cache.AddInvalidRange(0x6000, l2_cache_size * 2);
+  process->SetMaxReadSize(l2_cache_size * 2);
+  data_sp->SetByteSize(l2_cache_size * 2);
+  bytes_read = mem_cache.Read(0x6000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == 0);
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size * 2);
+
+  // What happens if we add an invalid range and try to do a read lt/eq a
+  // cache line?
+  mem_cache.AddInvalidRange(0x7000, l2_cache_size);
+  process->SetMaxReadSize(l2_cache_size);
+  data_sp->SetByteSize(l2_cache_size);
+  bytes_read = mem_cache.Read(0x7000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == 0);
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size);
+
+  // What happens if we remove the invalid range and read again?
+  mem_cache.RemoveInvalidRange(0x7000, l2_cache_size);
+  bytes_read = mem_cache.Read(0x7000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == l2_cache_size);
+  ASSERT_TRUE(process->m_bytes_left == 0);
+
+  // What happens if we flush and read again?
+  process->SetMaxReadSize(l2_cache_size * 2);
+  mem_cache.Flush(0x7000, l2_cache_size);
+  bytes_read = mem_cache.Read(0x7000, data_sp->GetBytes(),
+                              data_sp->GetByteSize(), error);
+  ASSERT_TRUE(bytes_read == l2_cache_size);
+  ASSERT_TRUE(process->m_bytes_left == l2_cache_size); // Verify that we re-read
+                                                       // instead of using an
+                                                       // old cache
+}
-- 
2.7.4