Tweak SuperPMI collections (#66627)
authorBruce Forstall <brucefo@microsoft.com>
Tue, 15 Mar 2022 02:31:13 +0000 (19:31 -0700)
committerGitHub <noreply@github.com>
Tue, 15 Mar 2022 02:31:13 +0000 (19:31 -0700)
1. Reduce partition size from 50MB to 25MB. This increases libraries
PMI partitions from 2 to 3 and coreclr_tests partitions to 25. Should
Allow better parallelism and tighter timeouts.
2. Reduce work item timeout of 5 hours to 3 or 2 hours,
based on Kusto data of maximum work item timeouts.
3. Exclude native msquic.dll
4. Fix issue in first_fit that ignored files too large for a "maximum size":
we want to include everything, even if something is larger than max_size.

eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml
src/coreclr/scripts/superpmi-collect.proj
src/coreclr/scripts/superpmi_collect_setup.py

index be05b52..5ea9e3f 100644 (file)
@@ -106,7 +106,7 @@ jobs:
     steps:
     - ${{ parameters.steps }}
   
-    - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_collect_setup.py -source_directory $(Build.SourcesDirectory) -core_root_directory $(Core_Root_Dir) -arch $(archType) -platform $(osGroup) -mch_file_tag $(MchFileTag) -input_directory $(InputDirectory) -collection_name $(CollectionName) -collection_type $(CollectionType) -max_size 50 # size in MB
+    - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_collect_setup.py -source_directory $(Build.SourcesDirectory) -core_root_directory $(Core_Root_Dir) -arch $(archType) -platform $(osGroup) -mch_file_tag $(MchFileTag) -input_directory $(InputDirectory) -collection_name $(CollectionName) -collection_type $(CollectionType) -max_size 25 # size in MB
       displayName: ${{ format('SuperPMI setup ({0})', parameters.osGroup) }}
 
     # Create required directories for merged mch collection and superpmi logs
index 8d59a82..f338245 100644 (file)
     <WorkItemCommand>$(Python) $(WorkItemCommand) -assemblies $(PmiAssembliesDirectory) -arch $(Architecture) -build_type $(BuildConfig) -core_root $(SuperPMIDirectory)</WorkItemCommand>
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(CollectionName)' != 'benchmarks'">
+    <WorkItemTimeout>2:00</WorkItemTimeout>
+  </PropertyGroup>
+
   <PropertyGroup Condition="'$(CollectionName)' == 'benchmarks'">
     <WorkItemCommand>$(Python) $(SuperPMIDirectory)/superpmi_benchmarks.py -performance_directory $(PerformanceDirectory) -superpmi_directory $(SuperPMIDirectory) -core_root $(SuperPMIDirectory) -arch $(Architecture)</WorkItemCommand>
+    <WorkItemTimeout>3:00</WorkItemTimeout>
   </PropertyGroup>
 
   <PropertyGroup>
     <EnableAzurePipelinesReporter>false</EnableAzurePipelinesReporter>
     <EnableXUnitReporter>false</EnableXUnitReporter>
-    <WorkItemTimeout>5:00</WorkItemTimeout>
   </PropertyGroup>
 
   <ItemGroup Condition=" '$(AGENT_OS)' == 'Windows_NT' ">
index 9eedadb..ce6eee4 100644 (file)
@@ -155,6 +155,7 @@ native_binaries_to_ignore = [
     "mscordbi.dll",
     "mscorrc.dll",
     "msdia140.dll",
+    "msquic.dll",
     "R2RDump.exe",
     "R2RTest.exe",
     "superpmi.exe",
@@ -276,7 +277,8 @@ def get_files_sorted_by_size(src_directory, exclude_directories, exclude_files):
 
 def first_fit(sorted_by_size, max_size):
     """ Given a list of file names along with size in descending order, divides the files
-    in number of buckets such that each bucket doesn't exceed max_size. Since this is a first-fit
+    in number of buckets such that each bucket doesn't exceed max_size (unless a single file exceeds
+    max_size, in which case it gets its own bucket). Since this is a first-fit
     approach, it doesn't guarantee to find the bucket with tighest spot available.
 
     Args:
@@ -301,8 +303,8 @@ def first_fit(sorted_by_size, max_size):
                     found_bucket = True
                     break
 
-            if not found_bucket:
-                partitions[len(partitions)] = [curr_file]
+        if not found_bucket:
+            partitions[len(partitions)] = [curr_file]
 
     total_size = 0
     for p_index in partitions: