Fix regex & startsWith name lookup in SBTarget::FindGlobalVariables
authorJim Ingham <jingham@apple.com>
Thu, 1 Jun 2023 23:12:52 +0000 (16:12 -0700)
committerJim Ingham <jingham@apple.com>
Thu, 1 Jun 2023 23:15:06 +0000 (16:15 -0700)
There were two bugs here.

eMatchTypeStartsWith searched for "symbol_name" by adding ".*" to the
end of the symbol name and treating that as a regex, which isn't
actually a regex for "starts with". The ".*" is in fact a no-op.  When
we finally get to comparing the name, we compare against whatever form
of the name was in the accelerator table. But for C++ that might be
the mangled name. We should also try demangled names here, since most
users are going the see demangled not mangled names.  I fixed these
two bugs and added a bunch of tests for FindGlobalVariables.

This change is in the DWARF parser code, so there may be a similar bug
in PDB, but the test for this was already skipped for Windows, so I
don't know about this.

You might theoretically need to do this Mangled comparison in

DWARFMappedHash::MemoryTable::FindByName

except when we have names we always chop them before looking them up
so I couldn't see any code paths that fail without that change. So I
didn't add that to this patch.

Differential Revision: https://reviews.llvm.org/D151940

lldb/source/API/SBTarget.cpp
lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp
lldb/test/API/lang/cpp/class_static/TestStaticVariables.py
lldb/test/API/lang/cpp/class_static/main.cpp

index 980cb77..53af5b1 100644 (file)
@@ -1892,7 +1892,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name,
                                                  max_matches, variable_list);
       break;
     case eMatchTypeStartsWith:
-      regexstr = llvm::Regex::escape(name) + ".*";
+      regexstr = "^" + llvm::Regex::escape(name) + ".*";
       target_sp->GetImages().FindGlobalVariables(RegularExpression(regexstr),
                                                  max_matches, variable_list);
       break;
index f530993..9b1497d 100644 (file)
@@ -9,6 +9,8 @@
 #include "HashedNameToDIE.h"
 #include "llvm/ADT/StringRef.h"
 
+#include "lldb/Core/Mangled.h"
+
 using namespace lldb_private::dwarf;
 
 bool DWARFMappedHash::ExtractDIEArray(
@@ -423,7 +425,11 @@ DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
       count * m_header.header_data.GetMinimumHashDataByteSize();
   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
                                                    min_total_hash_data_size)) {
-    const bool match = regex.Execute(llvm::StringRef(strp_cstr));
+    // The name in the name table may be a mangled name, in which case we
+    // should also compare against the demangled version.  The simplest way to
+    // do that is to use the Mangled class:
+    lldb_private::Mangled mangled_name((llvm::StringRef(strp_cstr)));
+    const bool match = mangled_name.NameMatches(regex);
 
     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
       // If the regex doesn't match and we have fixed size data, we can just
index 05c4514..6fd4a8c 100644 (file)
@@ -106,6 +106,20 @@ class StaticVariableTestCase(TestBase):
             ],
         )
 
+    def build_value_check(self, var_name, values):
+        children_1 = [ValueCheck(name = "x", value = values[0], type = "int"),
+                      ValueCheck(name = "y", value = values[1], type = "int")]
+        children_2 = [ValueCheck(name = "x", value = values[2], type = "int"),
+                      ValueCheck(name = "y", value = values[3], type = "int")]
+        elem_0 = ValueCheck(name = "[0]", value=None, type = "PointType",
+                            children=children_1)
+        elem_1 = ValueCheck(name = "[1]", value=None, type = "PointType",
+                            children=children_2)
+        value_check = ValueCheck(name=var_name, value = None, type = "PointType[2]",
+                                 children = [elem_0, elem_1])
+
+        return value_check
+
     @expectedFailureAll(
         compiler=["gcc"], bugnumber="Compiler emits incomplete debug info"
     )
@@ -142,27 +156,30 @@ class StaticVariableTestCase(TestBase):
         # in_scope_only => False
         valList = frame.GetVariables(False, False, True, False)
 
-        for val in valList:
+        # Build ValueCheckers for the values we're going to find:
+        value_check_A = self.build_value_check("A::g_points", ["1", "2", "11", "22"])
+        value_check_none = self.build_value_check("g_points", ["3", "4", "33", "44"])
+        value_check_AA = self.build_value_check("AA::g_points", ["5", "6", "55", "66"])
+
+        for val in valList: 
             self.DebugSBValue(val)
             name = val.GetName()
-            self.assertIn(name, ["g_points", "A::g_points"])
+            self.assertIn(name, ["g_points", "A::g_points", "AA::g_points"])
+
+            if name == "A::g_points":
+                self.assertEqual(val.GetValueType(), lldb.eValueTypeVariableGlobal)
+                value_check_A.check_value(self, val, "Got A::g_points right")
             if name == "g_points":
                 self.assertEqual(val.GetValueType(), lldb.eValueTypeVariableStatic)
-                self.assertEqual(val.GetNumChildren(), 2)
-            elif name == "A::g_points":
+                value_check_none.check_value(self, val, "Got g_points right")
+            if name == "AA::g_points":
                 self.assertEqual(val.GetValueType(), lldb.eValueTypeVariableGlobal)
-                self.assertEqual(val.GetNumChildren(), 2)
-                child1 = val.GetChildAtIndex(1)
-                self.DebugSBValue(child1)
-                child1_x = child1.GetChildAtIndex(0)
-                self.DebugSBValue(child1_x)
-                self.assertEqual(child1_x.GetTypeName(), "int")
-                self.assertEqual(child1_x.GetValue(), "11")
+                value_check_AA.check_value(self, val, "Got AA::g_points right")
 
         # SBFrame.FindValue() should also work.
         val = frame.FindValue("A::g_points", lldb.eValueTypeVariableGlobal)
         self.DebugSBValue(val)
-        self.assertEqual(val.GetName(), "A::g_points")
+        value_check_A.check_value(self, val, "FindValue also works")
 
         # Also exercise the "parameter" and "local" scopes while we are at it.
         val = frame.FindValue("argc", lldb.eValueTypeVariableArgument)
@@ -176,3 +193,37 @@ class StaticVariableTestCase(TestBase):
         val = frame.FindValue("hello_world", lldb.eValueTypeVariableLocal)
         self.DebugSBValue(val)
         self.assertEqual(val.GetName(), "hello_world")
+
+        # We should also be able to get class statics from FindGlobalVariables.
+        # eMatchTypeStartsWith should only find A:: not AA::
+        val_list = target.FindGlobalVariables("A::", 10, lldb.eMatchTypeStartsWith)
+        self.assertEqual(val_list.GetSize(), 1, "Found only one match")
+        val = val_list[0]
+        value_check_A.check_value(self, val, "FindGlobalVariables starts with")
+
+        # Regex should find both
+        val_list = target.FindGlobalVariables("A::", 10, lldb.eMatchTypeRegex)
+        self.assertEqual(val_list.GetSize(), 2, "Found A & AA")
+        found_a = False
+        found_aa = False
+        for val in val_list:
+            name = val.GetName()
+            if name == "A::g_points":
+                value_check_A.check_value(self, val, "AA found by regex")
+                found_a = True
+            elif name == "AA::g_points":
+                value_check_AA.check_value(self, val, "A found by regex")
+                found_aa = True
+        
+        self.assertTrue(found_a, "Regex search found A::g_points")
+        self.assertTrue(found_aa, "Regex search found AA::g_points")
+
+        # Normal search for full name should find one, but it looks like we don't match
+        # on identifier boundaries here yet:
+        val_list = target.FindGlobalVariables("A::g_points", 10, lldb.eMatchTypeNormal)
+        self.assertEqual(val_list.GetSize(), 2, "We aren't matching on name boundaries yet")
+
+        # Normal search for g_points should find 3 - FindGlobalVariables doesn't distinguish
+        # between file statics and globals:
+        val_list = target.FindGlobalVariables("g_points", 10, lldb.eMatchTypeNormal)
+        self.assertEqual(val_list.GetSize(), 3, "Found all three g_points")
index e96443e..40a8802 100644 (file)
@@ -21,23 +21,37 @@ public:
     static PointType g_points[];
 };
 
+// Make sure similar names don't confuse us:
+
+class AA
+{
+public:
+  static PointType g_points[];
+};
+
 PointType A::g_points[] = 
 {
     {    1,    2 },
     {   11,   22 }
 };
-
 static PointType g_points[] = 
 {
     {    3,    4 },
     {   33,   44 }
 };
 
+PointType AA::g_points[] = 
+{
+    {    5,    6 },
+    {   55,   66 }
+};
+
 int
 main (int argc, char const *argv[])
 {
     const char *hello_world = "Hello, world!";
     printf ("A::g_points[1].x = %i\n", A::g_points[1].x); // Set break point at this line.
+    printf ("AA::g_points[1].x = %i\n", AA::g_points[1].x);
     printf ("::g_points[1].x = %i\n", g_points[1].x);
     printf ("%s\n", hello_world);
     return 0;