[mono] Use unsigned char when computing UTF8 string hashes (#83273)
authorAleksey Kliger (λgeek) <alklig@microsoft.com>
Mon, 13 Mar 2023 14:14:21 +0000 (10:14 -0400)
committerGitHub <noreply@github.com>
Mon, 13 Mar 2023 14:14:21 +0000 (10:14 -0400)
* [mono] Use `unsigned char` when computing UTF8 string hashes

The C standard does not specify whether `char` is signed or unsigned, it is implementation defined.

Apparently Android aarch64 makes a different choice than other platforms (at least macOS arm64 and Windows x64 give different results).

Mono uses `mono_metadata_str_hash` in the AOT compiler and AOT runtime to optimize class name lookup.  As a result, classes whose names include UTF-8 continuation bytes (with the high bit = 1) will hash differently in the AOT compiler and on the device.

Fixes https://github.com/dotnet/runtime/issues/82187
Fixes https://github.com/dotnet/runtime/issues/78638

* [aot] add DEBUG_AOT_NAME_TABLE code for debugging the class names

   AOT compiler: Emits a second "class_name_table_debug" symbol that has all the class names and hashes as strings.

   AOT runtime: warns if a class is not found in the name cache

* Add regression test

src/mono/mono/eglib/ghashtable.c
src/mono/mono/metadata/metadata.c
src/mono/mono/mini/aot-compiler.c
src/mono/mono/mini/aot-runtime.c
src/mono/mono/mini/aot-runtime.h
src/tests/Loader/classloader/regressions/GitHub_82187/GitHub_82187.csproj [new file with mode: 0644]
src/tests/Loader/classloader/regressions/GitHub_82187/repro.cs [new file with mode: 0644]

index 8b5c29a..79b2460 100644 (file)
@@ -673,7 +673,7 @@ guint
 g_str_hash (gconstpointer v1)
 {
        guint hash = 0;
-       char *p = (char *) v1;
+       unsigned char *p = (unsigned char *) v1;
 
        while (*p++)
                hash = (hash << 5) - (hash + *p);
index eedf265..c2821d4 100644 (file)
@@ -5524,7 +5524,8 @@ guint
 mono_metadata_str_hash (gconstpointer v1)
 {
        /* Same as g_str_hash () in glib */
-       char *p = (char *) v1;
+       /* note: signed/unsigned char matters - we feed UTF-8 to this function, so the high bit will give diferent results if we don't match. */
+       unsigned char *p = (unsigned char *) v1;
        guint hash = *p;
 
        while (*p++) {
index f4713c8..611a75b 100644 (file)
@@ -11417,8 +11417,27 @@ emit_class_info (MonoAotCompile *acfg)
 typedef struct ClassNameTableEntry {
        guint32 token, index;
        struct ClassNameTableEntry *next;
+#ifdef DEBUG_AOT_NAME_TABLE
+       char *full_name;
+       uint32_t hash;
+#endif
 } ClassNameTableEntry;
 
+static char*
+get_class_full_name_for_hash (MonoClass *klass)
+{
+       return mono_type_get_name_full (m_class_get_byval_arg (klass), MONO_TYPE_NAME_FORMAT_FULL_NAME);
+}
+
+static uint32_t
+hash_for_class (MonoClass *klass)
+{
+       char *full_name = get_class_full_name_for_hash (klass);
+       uint32_t hash = mono_metadata_str_hash (full_name);
+       g_free (full_name);
+       return hash;
+}
+
 static void
 emit_class_name_table (MonoAotCompile *acfg)
 {
@@ -11426,9 +11445,12 @@ emit_class_name_table (MonoAotCompile *acfg)
        guint32 token, hash;
        MonoClass *klass;
        GPtrArray *table;
-       char *full_name;
        guint8 *buf, *p;
        ClassNameTableEntry *entry, *new_entry;
+#ifdef DEBUG_AOT_NAME_TABLE
+       int name_buf_size = 0;
+       guint8 *name_buf, *name_p;
+#endif
 
        /*
         * Construct a chained hash table for mapping class names to typedef tokens.
@@ -11446,13 +11468,17 @@ emit_class_name_table (MonoAotCompile *acfg)
                        mono_error_cleanup (error);
                        continue;
                }
-               full_name = mono_type_get_name_full (m_class_get_byval_arg (klass), MONO_TYPE_NAME_FORMAT_FULL_NAME);
-               hash = mono_metadata_str_hash (full_name) % table_size;
-               g_free (full_name);
+               hash = hash_for_class (klass) % table_size;
 
                /* FIXME: Allocate from the mempool */
                new_entry = g_new0 (ClassNameTableEntry, 1);
                new_entry->token = token;
+#ifdef DEBUG_AOT_NAME_TABLE
+               new_entry->full_name = get_class_full_name_for_hash (klass);
+               new_entry->hash = hash;
+               /* '%s'=%08x\n */
+               name_buf_size += strlen (new_entry->full_name) + strlen("''=\n") + 8;
+#endif
 
                entry = (ClassNameTableEntry *)g_ptr_array_index (table, hash);
                if (entry == NULL) {
@@ -11471,6 +11497,10 @@ emit_class_name_table (MonoAotCompile *acfg)
        /* Emit the table */
        buf_size = table->len * 4 + 4;
        p = buf = (guint8 *)g_malloc0 (buf_size);
+#ifdef DEBUG_AOT_NAME_TABLE
+       name_buf_size ++;  /* one extra trailing nul */
+       name_p = name_buf = (guint8 *)g_malloc0 (name_buf_size);
+#endif
 
        /* FIXME: Optimize memory usage */
        g_assert (table_size < 65000);
@@ -11488,14 +11518,28 @@ emit_class_name_table (MonoAotCompile *acfg)
                        else
                                encode_int16 (0, p, &p);
                }
+#ifdef DEBUG_AOT_NAME_TABLE
+               if (entry != NULL) {
+                       name_p += sprintf ((char*)name_p, "'%s'=%08x\n", entry->full_name, entry->hash);
+                       g_free (entry->full_name);
+               }
+#endif
                g_free (entry);
        }
+#ifdef DEBUG_AOT_NAME_TABLE
+       g_assert (name_p - name_buf <= name_buf_size);
+#endif
        g_assert (p - buf <= buf_size);
        g_ptr_array_free (table, TRUE);
 
        emit_aot_data (acfg, MONO_AOT_TABLE_CLASS_NAME, "class_name_table", buf, GPTRDIFF_TO_INT (p - buf));
 
        g_free (buf);
+
+#ifdef DEBUG_AOT_NAME_TABLE
+       emit_aot_data (acfg, MONO_AOT_TABLE_CLASS_NAME_DEBUG, "class_name_table_debug", name_buf, GPTRDIFF_TO_INT (name_p - name_buf));
+       g_free (name_buf);
+#endif
 }
 
 static void
index f7ebef5..d60b926 100644 (file)
@@ -2601,6 +2601,10 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
        MonoTableInfo  *t;
        guint32 cols [MONO_TYPEDEF_SIZE];
        GHashTable *nspace_table;
+#ifdef DEBUG_AOT_NAME_TABLE
+       char *debug_full_name;
+       uint32_t debug_hash;
+#endif
 
        if (!amodule || !amodule->class_name_table)
                return FALSE;
@@ -2634,6 +2638,10 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
                        full_name = g_strdup_printf ("%s.%s", name_space, name);
                }
        }
+#ifdef DEBUG_AOT_NAME_TABLE
+       debug_full_name = g_strdup (full_name);
+       debug_hash = mono_metadata_str_hash (full_name) % table_size;
+#endif
        hash = mono_metadata_str_hash (full_name) % table_size;
        if (full_name != full_name_buf)
                g_free (full_name);
@@ -2673,6 +2681,9 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
                                        g_hash_table_insert (nspace_table, (char*)name2, *klass);
                                        amodule_unlock (amodule);
                                }
+#ifdef DEBUG_AOT_NAME_TABLE
+                               g_free (debug_full_name);
+#endif
                                return TRUE;
                        }
 
@@ -2686,6 +2697,13 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
 
        amodule_unlock (amodule);
 
+#ifdef DEBUG_AOT_NAME_TABLE
+       if (*klass == NULL) {
+               g_warning ("AOT class name cache '%s'=%08x not found\n", debug_full_name, debug_hash);
+       }
+       g_free (debug_full_name);
+#endif
+
        return TRUE;
 }
 
index 77f227f..bd66659 100644 (file)
@@ -86,6 +86,8 @@ typedef enum {
        MONO_AOT_METHOD_FLAG_INTERP_ENTRY_ONLY = 16,
 } MonoAotMethodFlags;
 
+#undef DEBUG_AOT_NAME_TABLE
+
 typedef enum {
        MONO_AOT_TABLE_BLOB,
        MONO_AOT_TABLE_CLASS_NAME,
@@ -99,6 +101,9 @@ typedef enum {
        MONO_AOT_TABLE_IMAGE_TABLE,
        MONO_AOT_TABLE_WEAK_FIELD_INDEXES,
        MONO_AOT_TABLE_METHOD_FLAGS_TABLE,
+#ifdef DEBUG_AOT_NAME_TABLE
+       MONO_AOT_TABLE_CLASS_NAME_DEBUG,
+#endif
        MONO_AOT_TABLE_NUM
 } MonoAotFileTable;
 
diff --git a/src/tests/Loader/classloader/regressions/GitHub_82187/GitHub_82187.csproj b/src/tests/Loader/classloader/regressions/GitHub_82187/GitHub_82187.csproj
new file mode 100644 (file)
index 0000000..32355f2
--- /dev/null
@@ -0,0 +1,9 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <OutputType>Exe</OutputType>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="repro.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Loader/classloader/regressions/GitHub_82187/repro.cs b/src/tests/Loader/classloader/regressions/GitHub_82187/repro.cs
new file mode 100644 (file)
index 0000000..1ab3e54
--- /dev/null
@@ -0,0 +1,31 @@
+using System;
+
+/* Regression test for https://github.com/dotnet/runtime/issues/78638
+ * and https://github.com/dotnet/runtime/issues/82187 ensure AOT
+ * cross-compiler and AOT runtime use the same name hashing for names
+ * that include UTF-8 continuation bytes.
+ */
+
+[MySpecial(typeof(MeineTüre))]
+public class Program
+{
+    public static int Main()
+    {
+        var attr = (MySpecialAttribute)Attribute.GetCustomAttribute(typeof (Program), typeof(MySpecialAttribute), false);
+        if (attr == null)
+            return 101;
+        if (attr.Type == null)
+            return 102;
+        if (attr.Type.FullName != "MeineTüre")
+            return 103;
+        return 100;
+    }
+}
+
+public class MySpecialAttribute : Attribute
+{
+    public Type Type {get; private set; }
+    public MySpecialAttribute(Type t) { Type = t; }
+}
+
+public class MeineTüre {}