Improve superpmi collection steps (#652)
authorBruce Forstall <brucefo@microsoft.com>
Mon, 9 Dec 2019 23:45:42 +0000 (15:45 -0800)
committerGitHub <noreply@github.com>
Mon, 9 Dec 2019 23:45:42 +0000 (15:45 -0800)
Rearrange the "clean" and "remove dups" phases of the collection
steps. This saves a lot of time by avoiding JIT compiling
all the functions that we will just throw out because the MCs
are considered duplicates.

Fixes #646

src/coreclr/scripts/coreclr_arguments.py
src/coreclr/scripts/superpmi.py
src/coreclr/tests/src/JIT/superpmi/superpmicollect.cs

index 16869c1..2629238 100644 (file)
@@ -266,7 +266,7 @@ class CoreclrArguments:
         self.verify(args,
                     "core_root",
                     check_and_return_default_core_root,
-                    "Error, incorrect core_root location.")
+                    "Error, Core_Root could not be determined, or points to a location that doesn't exist.")
 
         self.verify(args,
                     "product_location",
index 23a1c6e..74b42dc 100755 (executable)
@@ -96,8 +96,6 @@ collect_parser.add_argument("--merge_mch_files", dest="merge_mch_files", default
 
 collect_parser.add_argument("--use_zapdisable", dest="use_zapdisable", default=False, action="store_true", help="Allow redundant calls to the systems libraries for more coverage.")
 
-collect_parser.add_argument("--assume_unclean_mch", dest="assume_unclean_mch", default=False, action="store_true", help="Force clean the mch file. This is useful if the dataset is large and there are expected dups.")
-
 # Allow for continuing a collection in progress
 collect_parser.add_argument("-existing_temp_dir", dest="existing_temp_dir", default=None, nargs="?")
 collect_parser.add_argument("--has_run_collection_command", dest="has_run_collection_command", default=False, action="store_true")
@@ -185,6 +183,48 @@ list_parser.add_argument("-build_type", dest="build_type", nargs='?', default="C
 list_parser.add_argument("-runtime_repo_root", dest="runtime_repo_root", default=os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), help="Path of the dotnet/runtime repo root directory. Optional.")
 
 ################################################################################
+# Helper functions
+################################################################################
+
+def is_zero_length_file(fpath):
+    """ Determine if a file system path refers to an existing file that is zero length
+
+    Args:
+        fpath (str) : file system path to test
+
+    Returns:
+        bool : true if the path is an existing file that is zero length
+    """
+    return os.path.isfile(fpath) and os.stat(fpath).st_size == 0
+
+def is_nonzero_length_file(fpath):
+    """ Determine if a file system path refers to an existing file that is non-zero length
+
+    Args:
+        fpath (str) : file system path to test
+
+    Returns:
+        bool : true if the path is an existing file that is non-zero length
+    """
+    return os.path.isfile(fpath) and os.stat(fpath).st_size != 0
+
+def make_safe_filename(s):
+    """ Turn a string into a string usable as a single file name component; replace illegal characters with underscores.
+
+    Args:
+        s (str) : string to convert to a file name
+
+    Returns:
+        (str) : The converted string
+    """
+    def safe_char(c):
+        if c.isalnum():
+            return c
+        else:
+            return "_"
+    return "".join(safe_char(c) for c in s)
+
+################################################################################
 # Helper classes
 ################################################################################
 
@@ -372,8 +412,8 @@ class SuperPMICollect:
         # The base .MCH file path
         self.base_mch_file = None
 
-        # Clean .MCH file path
-        self.clean_mch_file = None
+        # No dup .MCH file path
+        self.nodup_mch_file = None
 
         # Final .MCH file path
         self.final_mch_file = None
@@ -386,15 +426,15 @@ class SuperPMICollect:
         # Do a basic SuperPMI collect and validation:
         #   1. Collect MC files by running a set of sample apps.
         #   2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive".
-        #   3. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
-        #       out any failures (if any).
-        #    4. Create a thin unique MCH by using "mcs -removeDup -thin".
-        #    5. Create a TOC using "mcs -toc".
-        #    6. Verify the resulting MCH file is error-free when running SuperPMI against it with the
-        #       same JIT used for collection.
+        #   3. Create a thin unique MCH by using "mcs -removeDup -thin".
+        #   4. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
+        #      out any failures (if any).
+        #   5. Create a TOC using "mcs -toc".
+        #   6. Verify the resulting MCH file is error-free when running SuperPMI against it with the
+        #      same JIT used for collection.
         #
-        #    MCH files are big. If we don't need them anymore, clean them up right away to avoid
-        #    running out of disk space in disk constrained situations.
+        #   MCH files are big. If we don't need them anymore, clean them up right away to avoid
+        #   running out of disk space in disk constrained situations.
 
         passed = False
 
@@ -405,7 +445,7 @@ class SuperPMICollect:
                 self.final_fail_mcl_file = os.path.join(temp_location, "finalfail.mcl")
                 
                 self.base_mch_file = os.path.join(temp_location, "base.mch")
-                self.clean_mch_file = os.path.join(temp_location, "clean.mch")
+                self.nodup_mch_file = os.path.join(temp_location, "nodup.mch")
 
                 self.temp_location = temp_location
 
@@ -438,8 +478,8 @@ class SuperPMICollect:
                         self.__merge_mch_files__()
 
                 if not self.coreclr_args.has_verified_clean_mch:
-                    self.__create_clean_mch_file__()
                     self.__create_thin_unique_mch__()
+                    self.__create_clean_mch_file__()
                     self.__create_toc__()
                     self.__verify_final_mch__()
 
@@ -517,17 +557,6 @@ class SuperPMICollect:
                     
                     return assemblies
 
-                def make_safe_filename(s):
-                    def safe_char(c):
-                        if c.isalnum():
-                            return c
-                        else:
-                            return "_"
-                    return "".join(safe_char(c) for c in s)
-
-                def is_zero_length_file(fpath):  
-                    return os.path.isfile(fpath) and os.stat(fpath).st_size == 0
-
                 async def run_pmi(print_prefix, assembly, self):
                     """ Run pmi over all dlls
                     """
@@ -575,14 +604,10 @@ class SuperPMICollect:
 
                 os.environ.update(old_env)
 
-        contents = os.listdir(self.temp_location)
-        mc_contents = [os.path.join(self.temp_location, item) for item in contents if ".mc" in item]
-
-        if len(mc_contents) == 0:
+        mc_files = [os.path.join(self.temp_location, item) for item in os.listdir(self.temp_location) if item.endswith(".mc")]
+        if len(mc_files) == 0:
             raise RuntimeError("No .mc files generated.")
 
-        self.mc_contents = mc_contents
-
     def __merge_mc_files__(self):
         """ Merge the mc files that were generated
 
@@ -596,19 +621,16 @@ class SuperPMICollect:
         command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive"]
         print("Invoking: " + " ".join(command))
         proc = subprocess.Popen(command)
-
         proc.communicate()
 
-        if not os.path.isfile(self.mcs_path):
-            raise RuntimeError("mch file failed to be generated at: %s" % self.mcs_path)
-
-        contents = os.listdir(self.temp_location)
-        mc_contents = [os.path.join(self.temp_location, item) for item in contents if ".mc" in item and not ".mch" in item]
+        if not os.path.isfile(self.base_mch_file):
+            raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file)
 
         # All the individual MC files are no longer necessary, now that we have
         # merged them into the base.mch. Delete them.
         if not self.coreclr_args.skip_cleanup:
-            for item in mc_contents:
+            mc_files = [os.path.join(self.temp_location, item) for item in os.listdir(self.temp_location) if item.endswith(".mc")]
+            for item in mc_files:
                 os.remove(item)
 
     def __merge_mch_files__(self):
@@ -625,67 +647,67 @@ class SuperPMICollect:
             command = [self.mcs_path, "-concat", self.base_mch_file, item]
             print("Invoking: " + " ".join(command))
             proc = subprocess.Popen(command)
-
             proc.communicate()
 
-        if not os.path.isfile(self.mcs_path):
-            raise RuntimeError("mch file failed to be generated at: %s" % self.mcs_path)
+        if not os.path.isfile(self.base_mch_file):
+            raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file)
+
+    def __create_thin_unique_mch__(self):
+        """  Create a thin unique MCH
+        
+        Notes:
+            <mcl> -removeDup -thin <s_baseMchFile> <s_nodupMchFile>
+        """
+
+        command = [self.mcs_path, "-removeDup", "-thin", self.base_mch_file, self.nodup_mch_file]
+        print("Invoking: " + " ".join(command))
+        proc = subprocess.Popen(command)
+        proc.communicate()
+
+        if not os.path.isfile(self.nodup_mch_file):
+            raise RuntimeError("Error, no dup mch file not created correctly at: %s" % self.nodup_mch_file)
+
+        if not self.coreclr_args.skip_cleanup:
+            os.remove(self.base_mch_file)
+            self.base_mch_file = None
     
     def __create_clean_mch_file__(self):
-        """ Create a clean mch file based on the original
+        """ Create a clean mch file
 
         Notes:
-            <SuperPMIPath> -p -f <s_baseFailMclFile> <s_baseMchFile> <jitPath>
+            <SuperPMIPath> -p -f <s_baseFailMclFile> <s_nodupMchFile> <jitPath>
 
             if <s_baseFailMclFile> is non-empty:
-                <mcl> -strip <s_baseFailMclFile> <s_baseMchFile> <s_cleanMchFile>
+                <mcl> -strip <s_baseFailMclFile> <s_nodupMchFile> <s_finalMchFile>
             else
-                # no need to copy, just change the names
-                clean_mch_file = base_mch_file
+                # copy/move nodup file to final file
             del <s_baseFailMclFile>
         """
 
-        command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.base_mch_file, self.jit_path]
-        print (" ".join(command))
+        command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.nodup_mch_file, self.jit_path]
+        print("Invoking: " + " ".join(command))
         proc = subprocess.Popen(command)
-
         proc.communicate()
 
-        if os.path.isfile(self.base_fail_mcl_file) and os.stat(self.base_fail_mcl_file).st_size != 0:
-            command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.base_mch_file, self.clean_mch_file]
-            print (" ".join(command))
+        if is_nonzero_length_file(self.base_fail_mcl_file):
+            command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.nodup_mch_file, self.final_mch_file]
+            print("Invoking: " + " ".join(command))
             proc = subprocess.Popen(command)
-
             proc.communicate()
         else:
-            self.clean_mch_file = self.base_mch_file
-            self.base_mch_file = None
+            # Ideally we could just rename this file instead of copying it.
+            shutil.copy2(self.nodup_mch_file, self.final_mch_file)
 
-        if not os.path.isfile(self.clean_mch_file):
-            raise RuntimeError("Clean mch file failed to be generated.")
+        if not os.path.isfile(self.final_mch_file):
+            raise RuntimeError("Final mch file failed to be generated.")
 
         if not self.coreclr_args.skip_cleanup:
             if os.path.isfile(self.base_fail_mcl_file):
                 os.remove(self.base_fail_mcl_file)
                 self.base_fail_mcl_file = None
-
-    def __create_thin_unique_mch__(self):
-        """  Create a thin unique MCH
-        
-        Notes:
-            <mcl> -removeDup -thin <s_cleanMchFile> <s_finalMchFile>
-        """
-
-        command = [self.mcs_path, "-removeDup", "-thin", self.clean_mch_file, self.final_mch_file]
-        proc = subprocess.Popen(command)
-        proc.communicate()
-
-        if not os.path.isfile(self.final_mch_file):
-            raise RuntimeError("Error, final mch file not created correctly.")
-
-        if not self.coreclr_args.skip_cleanup:
-            os.remove(self.clean_mch_file)
-            self.clean_mch_file = None
+            if os.path.isfile(self.nodup_mch_file):
+                os.remove(self.nodup_mch_file)
+                self.nodup_mch_file = None
         
     def __create_toc__(self):
         """ Create a TOC file
@@ -695,11 +717,12 @@ class SuperPMICollect:
         """
 
         command = [self.mcs_path, "-toc", self.final_mch_file]
+        print("Invoking: " + " ".join(command))
         proc = subprocess.Popen(command)
         proc.communicate()
 
         if not os.path.isfile(self.toc_file):
-            raise RuntimeError("Error, toc file not created correctly.")
+            raise RuntimeError("Error, toc file not created correctly at: %s" % self.toc_file)
 
     def __verify_final_mch__(self):
         """ Verify the resulting MCH file is error-free when running SuperPMI against it with the same JIT used for collection.
@@ -712,7 +735,7 @@ class SuperPMICollect:
         passed = spmi_replay.replay()
 
         if not passed:
-            raise RuntimeError("Error unclean replay.")
+            raise RuntimeError("Error, unclean replay.")
 
 ################################################################################
 # SuperPMI Replay
@@ -821,7 +844,7 @@ class SuperPMIReplay:
                 print("Clean SuperPMI Replay")
                 return_code = True
 
-            if os.path.isfile(self.fail_mcl_file) and os.stat(self.fail_mcl_file).st_size != 0:
+            if is_nonzero_length_file(self.fail_mcl_file):  
                 # Unclean replay.
                 #
                 # Save the contents of the fail.mcl file to dig into failures.
@@ -1017,7 +1040,7 @@ class SuperPMIReplayAsmDiffs:
             else:
                 return_code = 1;
 
-            if os.path.isfile(self.fail_mcl_file) and os.stat(self.fail_mcl_file).st_size != 0:
+            if is_nonzero_length_file(self.fail_mcl_file):
                 # Unclean replay.
                 #
                 # Save the contents of the fail.mcl file to dig into failures.
@@ -1080,7 +1103,7 @@ class SuperPMIReplayAsmDiffs:
             # There were diffs. Go through each method that created diffs and
             # create a base/diff asm file with diffable asm. In addition, create
             # a standalone .mc for easy iteration.
-            if os.path.isfile(self.diff_mcl_file) and os.stat(self.diff_mcl_file).st_size != 0 or self.coreclr_args.diff_with_code_only:
+            if is_nonzero_length_file(self.diff_mcl_file) or self.coreclr_args.diff_with_code_only:
                 # AsmDiffs.
                 #
                 # Save the contents of the fail.mcl file to dig into failures.
@@ -1867,11 +1890,6 @@ def setup_args(args):
                             "Unable to set existing_temp_dir.")
 
         coreclr_args.verify(args,
-                            "assume_unclean_mch",
-                            lambda unused: True,
-                            "Unable to set assume_unclean_mch.")
-
-        coreclr_args.verify(args,
                             "has_run_collection_command",
                             lambda unused: True,
                             "Unable to set has_run_collection_command.")
index 8d65865..fddd4d9 100644 (file)
@@ -103,10 +103,10 @@ namespace SuperPMICollection
     internal class SuperPMICollectionClass
     {
         private static string s_tempDir = null;             // Temporary directory where we will put the MC files, MCH files, MCL files, and TOC.
-        private static string s_baseFailMclFile = null;     // Pathname for a temporary .MCL file used for noticing superpmi replay failures against base MCH.
+        private static string s_baseFailMclFile = null;     // Pathname for a temporary .MCL file used for noticing superpmi replay failures against preliminary MCH.
         private static string s_finalFailMclFile = null;    // Pathname for a temporary .MCL file used for noticing superpmi replay failures against final MCH.
         private static string s_baseMchFile = null;         // The base .MCH file path
-        private static string s_cleanMchFile = null;        // The clean .MCH file path
+        private static string s_nodupMchFile = null;        // The nodup .MCH file path
         private static string s_finalMchFile = null;        // The clean thin unique .MCH file path
         private static string s_tocFile = null;             // The .TOC file path for the clean thin unique .MCH file
         private static string s_errors = "";                // Collect non-fatal file delete errors to display at the end of the collection process.
@@ -119,7 +119,7 @@ namespace SuperPMICollection
             {
                 File.Delete(filePath);
             }
-            catch(Exception ex)
+            catch (Exception ex)
             {
                 string err = string.Format("Error deleting file \"{0}\": {1}", filePath, ex.Message);
                 s_errors += err + System.Environment.NewLine;
@@ -146,7 +146,7 @@ namespace SuperPMICollection
             s_baseFailMclFile  = Path.Combine(s_tempDir, "basefail.mcl");
             s_finalFailMclFile = Path.Combine(s_tempDir, "finalfail.mcl");
             s_baseMchFile      = Path.Combine(s_tempDir, "base.mch");
-            s_cleanMchFile     = Path.Combine(s_tempDir, "clean.mch");
+            s_nodupMchFile     = Path.Combine(s_tempDir, "nodup.mch");
 
             if (outputMchPath == null)
             {
@@ -362,59 +362,58 @@ namespace SuperPMICollection
             }
         }
 
+        // Create a thin unique MCH:
+        //      <mcl> -removeDup -thin <s_baseMchFile> <s_nodupMchFile>
+        private static void CreateThinUniqueMCH()
+        {
+            RunProgram(Global.McsPath, "-removeDup -thin " + s_baseMchFile + " " + s_nodupMchFile);
+
+            if (!File.Exists(s_nodupMchFile))
+            {
+                throw new SpmiException("file missing: " + s_nodupMchFile);
+            }
+
+            if (!Global.SkipCleanup)
+            {
+                // The base file is no longer used; delete it.
+                if (File.Exists(s_baseMchFile))
+                {
+                    SafeFileDelete(s_baseMchFile);
+                    s_baseMchFile = null;
+                }
+            }
+        }
+
         // Create clean MCH file:
-        //      <superPmiPath> -p -f <s_baseFailMclFile> <s_baseMchFile> <jitPath>
+        //      <superPmiPath> -p -f <s_baseFailMclFile> <s_nodupMchFile> <jitPath>
         //      if <s_baseFailMclFile> is non-empty:
-        //           <mcl> -strip <s_baseFailMclFile> <s_baseMchFile> <s_cleanMchFile>
+        //           <mcl> -strip <s_baseFailMclFile> <s_nodupMchFile> <s_finalMchFile>
         //      else:
-        //           s_cleanMchFile = s_baseMchFile // no need to copy; just change string names (and null out s_baseMchFile so we don't try to delete twice)
+        //           move s_nodupMchFile to s_finalMchFile
         //      del <s_baseFailMclFile>
         private static void CreateCleanMCHFile()
         {
-            RunProgram(Global.SuperPmiPath, "-p -f " + s_baseFailMclFile + " " + s_baseMchFile + " " + Global.JitPath);
+            RunProgram(Global.SuperPmiPath, "-p -f " + s_baseFailMclFile + " " + s_nodupMchFile + " " + Global.JitPath);
 
             if (File.Exists(s_baseFailMclFile) && !String.IsNullOrEmpty(File.ReadAllText(s_baseFailMclFile)))
             {
-                RunProgram(Global.McsPath, "-strip " + s_baseMchFile + " " + s_cleanMchFile);
+                RunProgram(Global.McsPath, "-strip " + s_nodupMchFile + " " + s_finalMchFile);
             }
             else
             {
-                // Instead of stripping the file, just set s_cleanMchFile = s_baseMchFile and
-                // null out s_baseMchFile so we don't try to delete the same file twice.
-                // Note that we never use s_baseMchFile after this function is called.
-
-                s_cleanMchFile = s_baseMchFile;
-                s_baseMchFile = null;
-            }
-
-            if (!File.Exists(s_cleanMchFile))
-            {
-                throw new SpmiException("file missing: " + s_cleanMchFile);
-            }
-
-            if (!Global.SkipCleanup)
-            {
-                if (File.Exists(s_baseFailMclFile))
+                try
                 {
-                    SafeFileDelete(s_baseFailMclFile);
-                    s_baseFailMclFile = null;
+                    Console.WriteLine("Moving {0} to {1}", s_nodupMchFile, s_finalMchFile);
+                    File.Move(s_nodupMchFile, s_finalMchFile, overwrite:true);
+                    s_nodupMchFile = null; // This file no longer exists.
                 }
-
-                // The base file is no longer used (unless there was no cleaning done, in which case
-                // s_baseMchFile has been null-ed and s_cleanMchFile points at the base file).
-                if ((s_baseMchFile != null) && File.Exists(s_baseMchFile))
+                catch (Exception ex)
                 {
-                    SafeFileDelete(s_baseMchFile);
-                    s_baseMchFile = null;
+                    string err = string.Format("Error moving file \"{0}\" to \"{1}\": {2}", s_nodupMchFile, s_finalMchFile, ex.Message);
+                    s_errors += err + System.Environment.NewLine;
+                    Console.Error.WriteLine(err);
                 }
             }
-        }
-
-        // Create a thin unique MCH:
-        //      <mcl> -removeDup -thin <s_cleanMchFile> <s_finalMchFile>
-        private static void CreateThinUniqueMCH()
-        {
-            RunProgram(Global.McsPath, "-removeDup -thin " + s_cleanMchFile + " " + s_finalMchFile);
 
             if (!File.Exists(s_finalMchFile))
             {
@@ -423,11 +422,17 @@ namespace SuperPMICollection
 
             if (!Global.SkipCleanup)
             {
-                // The clean file is no longer used; delete it.
-                if ((s_cleanMchFile != null) && File.Exists(s_cleanMchFile))
+                if (File.Exists(s_baseFailMclFile))
                 {
-                    SafeFileDelete(s_cleanMchFile);
-                    s_cleanMchFile = null;
+                    SafeFileDelete(s_baseFailMclFile);
+                    s_baseFailMclFile = null;
+                }
+
+                // The nodup file is no longer used.
+                if ((s_nodupMchFile != null) && File.Exists(s_nodupMchFile))
+                {
+                    SafeFileDelete(s_nodupMchFile);
+                    s_nodupMchFile = null;
                 }
             }
         }
@@ -471,7 +476,7 @@ namespace SuperPMICollection
         // Cleanup. If we get here due to a failure of some kind, we want to do full cleanup. If we get here as part
         // of normal shutdown processing, we want to keep the s_finalMchFile and s_tocFile if s_saveFinalMchFile == true.
         //      del <s_baseMchFile>
-        //      del <s_cleanMchFile>
+        //      del <s_nodupMchFile>
         //      del <s_finalMchFile>
         //      del <s_tocFile>
         //      rmdir <s_tempDir>
@@ -493,10 +498,10 @@ namespace SuperPMICollection
                     SafeFileDelete(s_baseMchFile);
                     s_baseMchFile = null;
                 }
-                if ((s_cleanMchFile != null) && File.Exists(s_cleanMchFile))
+                if ((s_nodupMchFile != null) && File.Exists(s_nodupMchFile))
                 {
-                    SafeFileDelete(s_cleanMchFile);
-                    s_cleanMchFile = null;
+                    SafeFileDelete(s_nodupMchFile);
+                    s_nodupMchFile = null;
                 }
 
                 if (!s_saveFinalMchFile)
@@ -538,9 +543,9 @@ namespace SuperPMICollection
             // Do a basic SuperPMI collect and validation:
             // 1. Collect MC files by running a set of sample apps.
             // 2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive".
-            // 3. Create a clean MCH by running superpmi over the MCH, and using "mcs -strip" to filter
+            // 3. Create a thin unique MCH by using "mcs -removeDup -thin".
+            // 4. Create a clean MCH by running superpmi over the MCH, and using "mcs -strip" to filter
             //    out any failures (if any).
-            // 4. Create a thin unique MCH by using "mcs -removeDup -thin".
             // 5. Create a TOC using "mcs -toc".
             // 6. Verify the resulting MCH file is error-free when running superpmi against it with the
             //    same JIT used for collection.
@@ -559,8 +564,8 @@ namespace SuperPMICollection
                 ChooseFilePaths(outputMchPath);
                 CollectMCFiles(runProgramPath, runProgramArguments);
                 MergeMCFiles();
-                CreateCleanMCHFile();
                 CreateThinUniqueMCH();
+                CreateCleanMCHFile();
                 CreateTOC();
                 VerifyFinalMCH();