[metadata] Break up mono_metadata_parse_type_internal (#60725)

author Aleksey Kliger (λgeek) <alklig@microsoft.com>

Tue, 26 Oct 2021 02:39:13 +0000 (22:39 -0400)

committer GitHub <noreply@github.com>

Tue, 26 Oct 2021 02:39:13 +0000 (22:39 -0400)
author Aleksey Kliger (λgeek) <alklig@microsoft.com>
Tue, 26 Oct 2021 02:39:13 +0000 (22:39 -0400)
committer GitHub <noreply@github.com>
Tue, 26 Oct 2021 02:39:13 +0000 (22:39 -0400)
diff --git a/src/mono/mono/metadata/metadata.c b/src/mono/mono/metadata/metadata.c

index 80fc7f0..698a9a2 100644 (file)
--- a/src/mono/mono/metadata/metadata.c
+++ b/src/mono/mono/metadata/metadata.c
@@ -1970,6 +1970,188 @@ mono_metadata_init (void)
         mono_metadata_update_init ();
  }
  
+/*
+ * Make a pass over the metadata signature blob starting at \p tmp_ptr and count the custom modifiers. 
+ */
+static int
+count_custom_modifiers (MonoImage *m, const char *tmp_ptr)
+{
+       int count = 0;
+       gboolean found = TRUE;
+       while (found) {
+               switch (*tmp_ptr) {
+               case MONO_TYPE_PINNED:
+               case MONO_TYPE_BYREF:
+                       ++tmp_ptr;
+                       break;
+               case MONO_TYPE_CMOD_REQD:
+               case MONO_TYPE_CMOD_OPT:
+                       count ++;
+                       mono_metadata_parse_custom_mod (m, NULL, tmp_ptr, &tmp_ptr);
+                       break;
+               default:
+                       found = FALSE;
+               }
+       }
+       return count;
+}
+
+/*
+ * Decode the (expected \p count, possibly 0) custom modifiers as well as the "byref" and "pinned"
+ * markers from the metadata stream \p ptr and put them into \p cmods
+ *
+ * Sets \p rptr past the end of the parsed metadata.  Sets \p pinned and \p byref if those modifiers
+ * were present.
+ */
+static void
+decode_custom_modifiers (MonoImage *m, MonoCustomModContainer *cmods, int count, const char *ptr, const char **rptr, gboolean *pinned, gboolean *byref)
+{
+       gboolean found = TRUE;
+       /* cmods are encoded in reverse order from how we normally see them.
+        * "int32 modopt (Foo) modopt (Bar)" is encoded as "cmod_opt [typedef_or_ref "Bar"] cmod_opt [typedef_or_ref "Foo"] I4"
+        */
+       while (found) {
+               switch (*ptr) {
+               case MONO_TYPE_PINNED:
+                       *pinned = TRUE;
+                       ++ptr;
+                       break;
+               case MONO_TYPE_BYREF:
+                       *byref = TRUE;
+                       ++ptr;
+                       break;
+               case MONO_TYPE_CMOD_REQD:
+               case MONO_TYPE_CMOD_OPT:
+                       g_assert (count > 0);
+                       mono_metadata_parse_custom_mod (m, &(cmods->modifiers [--count]), ptr, &ptr);
+                       break;
+               default:
+                       found = FALSE;
+               }
+       }
+
+       // either there were no cmods, or else we iterated through all of cmods backwards to populate it.
+       g_assert (count == 0);
+       *rptr = ptr;
+}
+
+/*
+ * Allocate the memory necessary to hold a \c MonoType with \p count custom modifiers.
+ * If \p transient is true, allocate from the heap, otherwise allocate from the mempool of image \p m
+ */
+static MonoType *
+alloc_type_with_cmods (MonoImage *m, gboolean transient, int count)
+{
+       g_assert (count > 0);
+       MonoType *type;
+       size_t size = mono_sizeof_type_with_mods (count, FALSE);
+       type = transient ? (MonoType *)g_malloc0 (size) : (MonoType *)mono_image_alloc0 (m, size);
+       type->has_cmods = TRUE;
+
+       MonoCustomModContainer *cmods = mono_type_get_cmods (type);
+       cmods->count = count;
+       cmods->image = m;
+
+       return type;
+}
+
+/*
+ * If \p transient is true, free \p type, otherwise no-op
+ */
+static void
+free_parsed_type (MonoType *type, gboolean transient)
+{
+       if (transient)
+               mono_metadata_free_type (type);
+}
+
+/*
+ * Try to find a pre-allocated version of the given \p type.
+ * Returns true and sets \p canonical_type if found, otherwise return false.
+ *
+ * For classes and valuetypes, this returns their embedded byval_arg or
+ * this_arg types.  For base types, it returns the global versions.
+ */
+static gboolean
+try_get_canonical_type (MonoType *type, MonoType **canonical_type)
+{
+       /* Note: If the type has any attribtues or modifiers the function currently returns false,
+        * although there's no fundamental reason we can't have cached copies in those instances (or
+        * indeed cached arrays, pointers or some generic instances).  However in that case there's
+        * limited utility in returning a cached copy because the parsing code in
+        * do_mono_metadata_parse_type could have allocated some mempool or heap memory already.
+        *
+        * This function should be kept closely in sync with mono_metadata_free_type so that it
+        * doesn't try to free canonical MonoTypes (which might not even be heap allocated).
+        */
+       g_assert (!type->has_cmods);
+       if ((type->type == MONO_TYPE_CLASS || type->type == MONO_TYPE_VALUETYPE) && !type->pinned && !type->attrs) {
+               MonoType *ret = m_type_is_byref (type) ? m_class_get_this_arg (type->data.klass) : m_class_get_byval_arg (type->data.klass);
+
+               /* Consider the case:
+
+                  class Foo<T> { class Bar {} }
+                  class Test : Foo<Test>.Bar {}
+
+                  When Foo<Test> is being expanded, 'Test' isn't yet initialized.  It's actually in
+                  a really pristine state: it doesn't even know whether 'Test' is a reference or a value type.
+
+                  We ensure that the MonoClass is in a state that we can canonicalize to:
+
+                  klass->_byval_arg.data.klass == klass
+                  klass->this_arg.data.klass == klass
+
+                  If we can't canonicalize 'type', it doesn't matter, since later users of 'type' will do it.
+
+                  LOCKING: even though we don't explicitly hold a lock, in the problematic case 'ret' is a field
+                  of a MonoClass which currently holds the loader lock.  'type' is local.
+               */
+               if (ret->data.klass == type->data.klass) {
+                       *canonical_type = ret;
+                       return TRUE;
+               }
+       }
+
+       /* Maybe it's one of the globaly-known basic types */
+       MonoType *cached;
+       /* No need to use locking since nobody is modifying the hash table */
+       if ((cached = (MonoType *)g_hash_table_lookup (type_cache, type))) {
+               *canonical_type = cached;
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+/*
+ * Fill in \p type (expecting \p cmod_count custom modifiers) by parsing it from the metadata stream pointed at by \p ptr.
+ *
+ * On success returns true and sets \p rptr past the parsed stream data.  On failure return false and sets \p error.
+ */
+static gboolean
+do_mono_metadata_parse_type_with_cmods (MonoType *type, int cmod_count, MonoImage *m, MonoGenericContainer *container,
+                                       short opt_attrs, gboolean transient, const char *ptr, const char **rptr, MonoError *error)
+{
+       gboolean byref= FALSE;
+       gboolean pinned = FALSE;
+
+       error_init (error);
+
+       /* Iterate again, but now parse pinned, byref and custom modifiers */
+       decode_custom_modifiers (m, mono_type_get_cmods (type), cmod_count, ptr, &ptr, &pinned, &byref);
+
+       type->attrs = opt_attrs;
+       type->byref__ = byref;
+       type->pinned = pinned ? 1 : 0;
+
+       if (!do_mono_metadata_parse_type (type, m, container, transient, ptr, &ptr, error))
+               return FALSE;
+
+       if (rptr)
+               *rptr = ptr;
+       return TRUE;
+}
+
  /**
   * mono_metadata_parse_type:
   * \param m metadata context
@@ -1999,17 +2181,23 @@ static MonoType*
  mono_metadata_parse_type_internal (MonoImage *m, MonoGenericContainer *container,
                                                                    short opt_attrs, gboolean transient, const char *ptr, const char **rptr, MonoError *error)
  {
-       MonoType *type, *cached;
+       MonoType *type;
         MonoType stype;
-       gboolean byref = FALSE;
-       gboolean pinned = FALSE;
-       const char *tmp_ptr;
         int count = 0; // Number of mod arguments
-       gboolean found;
+
+       gboolean allocated = FALSE;
  
         error_init (error);
  
         /*
+        * Q: What's going on with `stype` and `allocated`?  A: A very common case is that we're
+        * parsing "int" or "string" or "Dictionary<K,V>" non-transiently.  In that case we don't
+        * want to flood the mempool with millions of copies of MonoType 'int' (etc).  So we parse
+        * it into a stack variable and try_get_canonical_type, below.  As long as the type is
+        * normal, we will avoid having to make an extra copy in the mempool.
+        */
+
+       /*
          * According to the spec, custom modifiers should come before the byref
          * flag, but the IL produced by ilasm from the following signature:
          *   object modopt(...) &
@@ -2022,121 +2210,43 @@ mono_metadata_parse_type_internal (MonoImage *m, MonoGenericContainer *container
          */
  
         /* Count the modifiers first */
-       tmp_ptr = ptr;
-       found = TRUE;
-       while (found) {
-               switch (*tmp_ptr) {
-               case MONO_TYPE_PINNED:
-               case MONO_TYPE_BYREF:
-                       ++tmp_ptr;
-                       break;
-               case MONO_TYPE_CMOD_REQD:
-               case MONO_TYPE_CMOD_OPT:
-                       count ++;
-                       mono_metadata_parse_custom_mod (m, NULL, tmp_ptr, &tmp_ptr);
-                       break;
-               default:
-                       found = FALSE;
-               }
-       }
-
-       MonoCustomModContainer *cmods = NULL;
+       count = count_custom_modifiers (m, ptr);
  
         if (count) { // There are mods, so the MonoType will be of nonstandard size.
+               allocated = TRUE;
                 if (count > 64) {
                         mono_error_set_bad_image (error, m, "Invalid type with more than 64 modifiers");
                         return NULL;
                 }
-
-               size_t size = mono_sizeof_type_with_mods (count, FALSE);
-               type = transient ? (MonoType *)g_malloc0 (size) : (MonoType *)mono_image_alloc0 (m, size);
-               type->has_cmods = TRUE;
-
-               cmods = mono_type_get_cmods (type);
-               cmods->count = count;
-               cmods->image = m;
+               type = alloc_type_with_cmods (m, transient, count);
         } else {     // The type is of standard size, so we can allocate it on the stack.
                 type = &stype;
                 memset (type, 0, MONO_SIZEOF_TYPE);
         }
  
-       /* Iterate again, but now parse pinned, byref and custom modifiers */
-       found = TRUE;
-       /* cmods are encoded in reverse order from how we normally see them.
-        * "int32 modopt (Foo) modopt (Bar)" is encoded as "cmod_opt [typedef_or_ref "Bar"] cmod_opt [typedef_or_ref "Foo"] I4"
-        */
-       while (found) {
-               switch (*ptr) {
-               case MONO_TYPE_PINNED:
-                       pinned = TRUE;
-                       ++ptr;
-                       break;
-               case MONO_TYPE_BYREF:
-                       byref = TRUE;
-                       ++ptr;
-                       break;
-               case MONO_TYPE_CMOD_REQD:
-               case MONO_TYPE_CMOD_OPT:
-                       mono_metadata_parse_custom_mod (m, &(cmods->modifiers [--count]), ptr, &ptr);
-                       break;
-               default:
-                       found = FALSE;
-               }
-       }
-       
-       // either there were no cmods, or else we iterated through all of cmods backwards to populate it.
-       g_assert (count == 0);
-
-       type->attrs = opt_attrs;
-       type->byref__ = byref;
-       type->pinned = pinned ? 1 : 0;
-
-       if (!do_mono_metadata_parse_type (type, m, container, transient, ptr, &ptr, error))
+       if (!do_mono_metadata_parse_type_with_cmods (type, count, m, container, opt_attrs, transient, ptr, rptr, error)) {
+               if (allocated)
+                       free_parsed_type (type, transient);
                 return NULL;
-
-       if (rptr)
-               *rptr = ptr;
+       }
+               
  
         // Possibly we can return an already-allocated type instead of the one we decoded
-       if (!type->has_cmods && !transient) {
+       if (!allocated && !transient) {
                 /* no need to free type here, because it is on the stack */
-               if ((type->type == MONO_TYPE_CLASS || type->type == MONO_TYPE_VALUETYPE) && !type->pinned && !type->attrs) {
-                       MonoType *ret = m_type_is_byref (type) ? m_class_get_this_arg (type->data.klass) : m_class_get_byval_arg (type->data.klass);
-
-                       /* Consider the case:
-
-                            class Foo<T> { class Bar {} }
-                            class Test : Foo<Test>.Bar {}
-
-                          When Foo<Test> is being expanded, 'Test' isn't yet initialized.  It's actually in
-                          a really pristine state: it doesn't even know whether 'Test' is a reference or a value type.
-
-                          We ensure that the MonoClass is in a state that we can canonicalize to:
-
-                            klass->_byval_arg.data.klass == klass
-                            klass->this_arg.data.klass == klass
-
-                          If we can't canonicalize 'type', it doesn't matter, since later users of 'type' will do it.
-
-                          LOCKING: even though we don't explicitly hold a lock, in the problematic case 'ret' is a field
-                                   of a MonoClass which currently holds the loader lock.  'type' is local.
-                       */
-                       if (ret->data.klass == type->data.klass) {
-                               return ret;
-                       }
-               }
-               /* No need to use locking since nobody is modifying the hash table */
-               if ((cached = (MonoType *)g_hash_table_lookup (type_cache, type))) {
-                       return cached;
-               }
+               MonoType *ret_type = NULL;
+               if (try_get_canonical_type (type, &ret_type))
+                       return ret_type;
         }
         
         /* printf ("%x %x %c %s\n", type->attrs, type->num_mods, type->pinned ? 'p' : ' ', mono_type_full_name (type)); */
-       
-       if (type == &stype) { // Type was allocated on the stack, so we need to copy it to safety
+
+       // Otherwise return the type we decoded
+       if (!allocated) { // Type was allocated on the stack, so we need to copy it to safety
                 type = transient ? (MonoType *)g_malloc (MONO_SIZEOF_TYPE) : (MonoType *)mono_image_alloc (m, MONO_SIZEOF_TYPE);
                 memcpy (type, &stype, MONO_SIZEOF_TYPE);
         }
+       g_assert (type != &stype);
         return type;
  }
  
@@ -3977,6 +4087,8 @@ do_mono_metadata_parse_type (MonoType *type, MonoImage *m, MonoGenericContainer
  void
  mono_metadata_free_type (MonoType *type)
  {
+       /* Note: keep in sync with do_mono_metadata_parse_type and try_get_canonical_type which
+        * allocate memory or try to avoid allocating memory. */
         if (type >= builtin_types && type < builtin_types + NBUILTIN_TYPES ())
                 return;
author	Aleksey Kliger (λgeek) <alklig@microsoft.com>
	Tue, 26 Oct 2021 02:39:13 +0000 (22:39 -0400)
committer	GitHub <noreply@github.com>
	Tue, 26 Oct 2021 02:39:13 +0000 (22:39 -0400)