Create superpmi-asmdiffs pipeline (#61194)

author Bruce Forstall <brucefo@microsoft.com>

Wed, 10 Nov 2021 21:28:49 +0000 (13:28 -0800)

committer GitHub <noreply@github.com>

Wed, 10 Nov 2021 21:28:49 +0000 (13:28 -0800)
author Bruce Forstall <brucefo@microsoft.com>
Wed, 10 Nov 2021 21:28:49 +0000 (13:28 -0800)
committer GitHub <noreply@github.com>
Wed, 10 Nov 2021 21:28:49 +0000 (13:28 -0800)
diff --git a/eng/pipelines/coreclr/superpmi-asmdiffs.yml b/eng/pipelines/coreclr/superpmi-asmdiffs.yml

new file mode 100644 (file)

index 0000000..bd41ac0
--- /dev/null
+++ b/eng/pipelines/coreclr/superpmi-asmdiffs.yml
@@ -0,0 +1,29 @@
+pr:
+  branches:
+    include:
+    - main
+  paths:
+    include:
+    - src/coreclr/jit/*
+
+jobs:
+
+- template: /eng/pipelines/common/platform-matrix.yml
+  parameters:
+    jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+    buildConfig: checked
+    platforms:
+    - windows_x64
+    - windows_x86
+    jobParameters:
+      uploadAs: 'pipelineArtifacts'
+
+- template: /eng/pipelines/common/platform-matrix.yml
+  parameters:
+    jobTemplate: /eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml
+    buildConfig: checked
+    platforms:
+    - windows_x64
+    - windows_x86
+    helixQueueGroup: ci
+    helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+\ No newline at end of file
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml

new file mode 100644 (file)

index 0000000..46535d2
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml
@@ -0,0 +1,137 @@
+parameters:
+  steps: []                       # optional -- any additional steps that need to happen before pulling down the jitutils repo and sending the jitutils to helix (ie building your repo)
+  variables: []                   # optional -- list of additional variables to send to the template
+  jobName: ''                     # required -- job name
+  displayName: ''                 # optional -- display name for the job. Will use jobName if not passed
+  pool: ''                        # required -- name of the Build pool
+  container: ''                   # required -- name of the container
+  buildConfig: ''                 # required -- build configuration
+  archType: ''                    # required -- targeting CPU architecture
+  osGroup: ''                     # required -- operating system for the job
+  osSubgroup: ''                  # optional -- operating system subgroup
+  continueOnError: 'false'        # optional -- determines whether to continue the build if the step errors
+  dependsOn: ''                   # optional -- dependencies of the job
+  timeoutInMinutes: 120           # optional -- timeout for the job
+  enableTelemetry: false          # optional -- enable for telemetry
+  liveLibrariesBuildConfig: ''    # optional -- live-live libraries configuration to use for the run
+  helixQueues: ''                 # required -- Helix queues
+  dependOnEvaluatePaths: false
+
+jobs:
+- template: xplat-pipeline-job.yml
+  parameters:
+    dependsOn: ${{ parameters.dependsOn }}
+    buildConfig: ${{ parameters.buildConfig }}
+    archType: ${{ parameters.archType }}
+    osGroup: ${{ parameters.osGroup }}
+    osSubgroup: ${{ parameters.osSubgroup }}
+    liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+    enableTelemetry: ${{ parameters.enableTelemetry }}
+    enablePublishBuildArtifacts: true
+    continueOnError: ${{ parameters.continueOnError }}
+    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
+    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
+
+    ${{ if ne(parameters.displayName, '') }}:
+      displayName: '${{ parameters.displayName }}'
+    ${{ if eq(parameters.displayName, '') }}:
+      displayName: '${{ parameters.jobName }}'
+
+    variables:
+
+    - ${{ each variable in parameters.variables }}:
+      - ${{ if ne(variable.name, '') }}:
+        - name: ${{ variable.name }}
+          value: ${{ variable.value }}
+      - ${{ if ne(variable.group, '') }}:
+        - group: ${{ variable.group }}
+
+    - name: PythonScript
+      value: 'py -3'
+    - name: PipScript
+      value: 'py -3 -m pip'
+    - name: SpmiCollectionLocation
+      value: '$(Build.SourcesDirectory)\artifacts\spmi\'
+    - name: SpmiLogsLocation
+      value: '$(Build.SourcesDirectory)\artifacts\spmi_logs\'
+    - name: SpmiAsmdiffsLocation
+      value: '$(Build.SourcesDirectory)\artifacts\spmi_asmdiffs\'
+    - name: HelixResultLocation
+      value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
+
+    workspace:
+      clean: all
+    pool:
+      ${{ parameters.pool }}
+    container: ${{ parameters.container }}
+    steps:
+    - ${{ parameters.steps }}
+
+    - script: |
+        mkdir $(SpmiCollectionLocation)
+        mkdir $(SpmiLogsLocation)
+        mkdir $(SpmiAsmdiffsLocation)
+      displayName: Create directories
+
+    - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_asmdiffs_setup.py -source_directory $(Build.SourcesDirectory) -product_directory $(buildProductRootFolderPath) -arch $(archType)
+      displayName: ${{ format('SuperPMI asmdiffs setup ({0})', parameters.archType) }}
+
+      # Run superpmi asmdiffs in helix
+    - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml
+      parameters:
+        displayName: 'Send job to Helix'
+        helixBuild: $(Build.BuildNumber)
+        helixSource: $(_HelixSource)
+        helixType: 'build/tests/'
+        helixQueues: ${{ join(',', parameters.helixQueues) }}
+        creator: dotnet-bot
+        WorkItemTimeout: 2:00 # 2 hours
+        WorkItemDirectory: '$(WorkItemDirectory)'
+        CorrelationPayloadDirectory: '$(CorrelationPayloadDirectory)'
+        helixProjectArguments: '$(Build.SourcesDirectory)/src/coreclr/scripts/superpmi-asmdiffs.proj'
+        BuildConfig: ${{ parameters.buildConfig }}
+        osGroup: ${{ parameters.osGroup }}
+        archType: ${{ parameters.archType }}
+        shouldContinueOnError: true # Run the future step i.e. upload superpmi logs
+
+      # Always upload the available logs for diagnostics
+    - task: CopyFiles@2
+      displayName: Copying superpmi.log of all partitions
+      inputs:
+        sourceFolder: '$(HelixResultLocation)'
+        contents: '**/superpmi_*.log'
+        targetFolder: '$(SpmiLogsLocation)'
+      condition: always()
+
+    - task: CopyFiles@2
+      displayName: Copying superpmi.md of all partitions
+      inputs:
+        sourceFolder: '$(HelixResultLocation)'
+        contents: '**/superpmi_*.md'
+        targetFolder: '$(SpmiAsmdiffsLocation)'
+      condition: always()
+
+    - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_asmdiffs_summarize.py -diff_summary_dir $(SpmiAsmdiffsLocation) -arch $(archType)
+      displayName: ${{ format('Summarize ({0})', parameters.archType) }}
+      condition: always()
+
+    - task: PublishPipelineArtifact@1
+      displayName: Publish SuperPMI logs
+      inputs:
+        targetPath: $(SpmiLogsLocation)
+        artifactName: 'SuperPMI_Logs_$(archType)_$(buildConfig)'
+      condition: always()
+
+    - task: PublishPipelineArtifact@1
+      displayName: Publish SuperPMI asmdiffs files
+      inputs:
+        targetPath: $(SpmiAsmdiffsLocation)
+        artifactName: 'SuperPMI_Asmdiffs_$(archType)_$(buildConfig)'
+      condition: always()
+
+    - task: PublishPipelineArtifact@1
+      displayName: Publish SuperPMI build logs
+      inputs:
+        targetPath: $(Build.SourcesDirectory)/artifacts/log
+        artifactName: 'SuperPMI_BuildLogs_$(archType)_$(buildConfig)'
+      condition: always()
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml

index 01726c6..1ac235f 100644 (file)
--- a/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml
+++ b/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml
@@ -114,7 +114,7 @@ jobs:
        - script: |
            mkdir -p $(MergedMchFileLocation)
            mkdir -p $(SpmiLogsLocation)
-        displayName: Create directory for merged collection
+        displayName: Create directories
      - ${{ if eq(parameters.osGroup, 'windows') }}:
        - script: |
            mkdir $(MergedMchFileLocation)
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml

index 696b2e5..9b64c38 100644 (file)
--- a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml
+++ b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml
@@ -66,11 +66,12 @@ jobs:
      - ${{ parameters.steps }}
  
      - script: |
-        mkdir -p $(SpmiCollectionLocation)
-      displayName: Create directory for SPMI collection
+        mkdir $(SpmiCollectionLocation)
+        mkdir $(SpmiLogsLocation)
+      displayName: Create directories
  
      - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_replay_setup.py -source_directory $(Build.SourcesDirectory) -product_directory $(buildProductRootFolderPath) -arch $(archType)
-      displayName: ${{ format('SuperPMI replay setup ({0} {1})', parameters.osGroup, parameters.archType) }}
+      displayName: ${{ format('SuperPMI replay setup ({0})', parameters.archType) }}
  
        # Run superpmi replay in helix
      - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml
diff --git a/eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml b/eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml

new file mode 100644 (file)

index 0000000..bb05902
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml
@@ -0,0 +1,39 @@
+parameters:
+  buildConfig: ''                 # required -- build configuration
+  archType: ''                    # required -- targeting CPU architecture
+  osGroup: ''                     # required -- operating system for the job
+  osSubgroup: ''                  # optional -- operating system subgroup
+  pool: ''
+  timeoutInMinutes: 120           # build timeout
+  variables: {}
+  helixQueues: ''
+  dependOnEvaluatePaths: false
+  runJobTemplate: '/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml'
+
+jobs:
+- template: ${{ parameters.runJobTemplate }}
+  parameters:
+    jobName: ${{ format('superpmi_asmdiffs_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+    displayName: ${{ format('SuperPMI asmdiffs {0}{1} {2} {3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+    pool: ${{ parameters.pool }}
+    buildConfig: ${{ parameters.buildConfig }}
+    archType: ${{ parameters.archType }}
+    osGroup: ${{ parameters.osGroup }}
+    osSubgroup: ${{ parameters.osSubgroup }}
+    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
+    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
+    helixQueues: ${{ parameters.helixQueues }}
+    dependsOn:
+     - ${{ format('coreclr_jit_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+
+    variables: ${{ parameters.variables }}
+
+    steps:
+
+    # Download jit builds
+    - template: /eng/pipelines/common/download-artifact-step.yml
+      parameters:
+        unpackFolder: $(buildProductRootFolderPath)
+        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
+        artifactName: '$(buildProductArtifactName)'
+        displayName: 'JIT product build'
+\ No newline at end of file
diff --git a/src/coreclr/scripts/antigen_run.py b/src/coreclr/scripts/antigen_run.py

index 9614600..240e135 100644 (file)
--- a/src/coreclr/scripts/antigen_run.py
+++ b/src/coreclr/scripts/antigen_run.py
@@ -20,7 +20,7 @@ from os import path, walk
  from os.path import getsize
  import os
  from coreclr_arguments import *
-from azdo_pipelines_util import run_command, TempDir
+from jitutil import run_command, TempDir
  
  parser = argparse.ArgumentParser(description="description")
  
diff --git a/src/coreclr/scripts/azdo_pipelines_util.py b/src/coreclr/scripts/azdo_pipelines_util.py

deleted file mode 100644 (file)

index 83f1d08..0000000
--- a/src/coreclr/scripts/azdo_pipelines_util.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python3
-#
-# Licensed to the .NET Foundation under one or more agreements.
-# The .NET Foundation licenses this file to you under the MIT license.
-#
-# Title               : azdo_pipelines_util.py
-#
-# Notes:
-#
-# Utility functions used by Python scripts involved with Azure DevOps Pipelines
-# setup.
-#
-################################################################################
-################################################################################
-
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-
-
-def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=None):
-    """ Runs the command.
-
-    Args:
-        command_to_run ([string]): Command to run along with arguments.
-        _cwd (string): Current working directory.
-        _exit_on_fail (bool): If it should exit on failure.
-    Returns:
-        (string, string, int): Returns a tuple of stdout, stderr, and command return code if _output_file= None
-        Otherwise stdout, stderr are empty.
-    """
-    print("Running: " + " ".join(command_to_run))
-    command_stdout = ""
-    command_stderr = ""
-    return_code = 1
-
-    output_type = subprocess.STDOUT if _output_file else subprocess.PIPE
-    with subprocess.Popen(command_to_run, stdout=subprocess.PIPE, stderr=output_type, cwd=_cwd) as proc:
-
-        # For long running command, continuously print the output
-        if _output_file:
-            while True:
-                with open(_output_file, 'a') as of:
-                    output = proc.stdout.readline()
-                    if proc.poll() is not None:
-                        break
-                    if output:
-                        output_str = output.strip().decode("utf-8")
-                        print(output_str)
-                        of.write(output_str + "\n")
-        else:
-            command_stdout, command_stderr = proc.communicate()
-            if len(command_stdout) > 0:
-                print(command_stdout.decode("utf-8"))
-            if len(command_stderr) > 0:
-                print(command_stderr.decode("utf-8"))
-
-        return_code = proc.returncode
-        if _exit_on_fail and return_code != 0:
-            print("Command failed. Exiting.")
-            sys.exit(1)
-    return command_stdout, command_stderr, return_code
-
-
-def copy_directory(src_path, dst_path, verbose_output=True, match_func=lambda path: True):
-    """Copies directory in 'src_path' to 'dst_path' maintaining the directory
-    structure. https://docs.python.org/3.5/library/shutil.html#shutil.copytree can't
-    be used in this case because it expects the destination directory should not
-    exist, however we do call copy_directory() to copy files to same destination directory.
-
-    Args:
-        src_path (string): Path of source directory that need to be copied.
-        dst_path (string): Path where directory should be copied.
-        verbose_output (bool): True to print every copy or skipped file.
-        match_func (str -> bool) : Criteria function determining if a file is copied.
-    """
-    if not os.path.exists(dst_path):
-        os.makedirs(dst_path)
-    for item in os.listdir(src_path):
-        src_item = os.path.join(src_path, item)
-        dst_item = os.path.join(dst_path, item)
-        if os.path.isdir(src_item):
-            copy_directory(src_item, dst_item, verbose_output, match_func)
-        else:
-            try:
-                if match_func(src_item):
-                    if verbose_output:
-                        print("> copy {0} => {1}".format(src_item, dst_item))
-                    try:
-                        shutil.copy2(src_item, dst_item)
-                    except PermissionError as pe_error:
-                        print('Ignoring PermissionError: {0}'.format(pe_error))
-                else:
-                    if verbose_output:
-                        print("> skipping {0}".format(src_item))
-            except UnicodeEncodeError:
-                if verbose_output:
-                    print("> Got UnicodeEncodeError")
-
-
-def copy_files(src_path, dst_path, file_names):
-    """Copy files from 'file_names' list from 'src_path' to 'dst_path'.
-    It retains the original directory structure of src_path.
-
-    Args:
-        src_path (string): Source directory from where files are copied.
-        dst_path (string): Destination directory where files to be copied.
-        file_names ([string]): List of full path file names to be copied.
-    """
-
-    print('### Copying below files from {0} to {1}:'.format(src_path, dst_path))
-    print('')
-    print(os.linesep.join(file_names))
-    for f in file_names:
-        # Create same structure in dst so we don't clobber same files names present in different directories
-        dst_path_of_file = f.replace(src_path, dst_path)
-
-        dst_directory = os.path.dirname(dst_path_of_file)
-        if not os.path.exists(dst_directory):
-            os.makedirs(dst_directory)
-        try:
-            shutil.copy2(f, dst_path_of_file)
-        except PermissionError as pe_error:
-            print('Ignoring PermissionError: {0}'.format(pe_error))
-
-
-def set_pipeline_variable(name, value):
-    """ This method sets pipeline variable.
-
-    Args:
-        name (string): Name of the variable.
-        value (string): Value of the variable.
-    """
-    define_variable_format = "##vso[task.setvariable variable={0}]{1}"
-    print("{0} -> {1}".format(name, value))  # logging
-    print(define_variable_format.format(name, value))  # set variable
-
-
-class TempDir:
-    """ Class to create a temporary working directory, or use one that is passed as an argument.
-
-        Use with: "with TempDir() as temp_dir" to change to that directory and then automatically
-        change back to the original working directory afterwards and remove the temporary
-        directory and its contents (if skip_cleanup is False).
-    """
-
-    def __init__(self, path=None, skip_cleanup=False):
-        self.mydir = tempfile.mkdtemp() if path is None else path
-        self.cwd = None
-        self._skip_cleanup = skip_cleanup
-
-    def __enter__(self):
-        self.cwd = os.getcwd()
-        os.chdir(self.mydir)
-        return self.mydir
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        os.chdir(self.cwd)
-        if not self._skip_cleanup:
-            shutil.rmtree(self.mydir)
-
-
-class ChangeDir:
-    """ Class to temporarily change to a given directory. Use with "with".
-    """
-
-    def __init__(self, mydir):
-        self.mydir = mydir
-        self.cwd = None
-
-    def __enter__(self):
-        self.cwd = os.getcwd()
-        os.chdir(self.mydir)
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        os.chdir(self.cwd)
-\ No newline at end of file
diff --git a/src/coreclr/scripts/fuzzer_setup.py b/src/coreclr/scripts/fuzzer_setup.py

index 54ce2f3..7bc60ac 100644 (file)
--- a/src/coreclr/scripts/fuzzer_setup.py
+++ b/src/coreclr/scripts/fuzzer_setup.py
@@ -18,7 +18,7 @@ import argparse
  import os
  from coreclr_arguments import *
  from os import path
-from azdo_pipelines_util import run_command, copy_directory, set_pipeline_variable, ChangeDir, TempDir
+from jitutil import run_command, copy_directory, set_pipeline_variable, ChangeDir, TempDir
  
  parser = argparse.ArgumentParser(description="description")
  
@@ -103,7 +103,7 @@ def main(main_args):
  
      # create exploratory directory
      print('Copying {} -> {}'.format(scripts_src_directory, coreroot_directory))
-    copy_directory(scripts_src_directory, coreroot_directory, match_func=lambda path: any(path.endswith(extension) for extension in [".py"]))
+    copy_directory(scripts_src_directory, coreroot_directory, verbose_output=True, match_func=lambda path: any(path.endswith(extension) for extension in [".py"]))
  
      if is_windows:
          acceptable_copy = lambda path: any(path.endswith(extension) for extension in [".py", ".dll", ".exe", ".json"])
@@ -113,7 +113,7 @@ def main(main_args):
  
      # copy CORE_ROOT
      print('Copying {} -> {}'.format(coreclr_args.core_root_directory, coreroot_directory))
-    copy_directory(coreclr_args.core_root_directory, coreroot_directory, match_func=acceptable_copy)
+    copy_directory(coreclr_args.core_root_directory, coreroot_directory, verbose_output=True, match_func=acceptable_copy)
  
      try:
          with TempDir() as tool_code_directory:
@@ -136,7 +136,7 @@ def main(main_args):
  
              # copy tool
              print('Copying {} -> {}'.format(publish_dir, dst_directory))
-            copy_directory(publish_dir, dst_directory, match_func=acceptable_copy)
+            copy_directory(publish_dir, dst_directory, verbose_output=True, match_func=acceptable_copy)
      except PermissionError as pe:
          print("Skipping file. Got error: %s", pe)
  
diff --git a/src/coreclr/scripts/fuzzlyn_run.py b/src/coreclr/scripts/fuzzlyn_run.py

index 7d99b1f..fa37b6e 100644 (file)
--- a/src/coreclr/scripts/fuzzlyn_run.py
+++ b/src/coreclr/scripts/fuzzlyn_run.py
@@ -20,7 +20,7 @@ import os
  import re
  import shutil
  import threading
-from azdo_pipelines_util import run_command, TempDir
+from jitutil import run_command, TempDir
  from coreclr_arguments import *
  from os import path
  
diff --git a/src/coreclr/scripts/jitrollingbuild.py b/src/coreclr/scripts/jitrollingbuild.py

index 0003805..d120fa4 100644 (file)
--- a/src/coreclr/scripts/jitrollingbuild.py
+++ b/src/coreclr/scripts/jitrollingbuild.py
@@ -32,10 +32,10 @@ locale.setlocale(locale.LC_ALL, '')  # Use '' for auto, or force e.g. to 'en_US.
  ################################################################################
  
  az_account_name = "clrjit2"
-az_container_name = "jitrollingbuild"
+az_jitrollingbuild_container_name = "jitrollingbuild"
  az_builds_root_folder = "builds"
  az_blob_storage_account_uri = "https://" + az_account_name + ".blob.core.windows.net/"
-az_blob_storage_container_uri = az_blob_storage_account_uri + az_container_name
+az_blob_storage_jitrollingbuild_container_uri = az_blob_storage_account_uri + az_jitrollingbuild_container_name
  
  ################################################################################
  # Argument Parser
@@ -50,19 +50,28 @@ Upload clrjit to SuperPMI Azure storage.
  """
  
  download_description = """\
-Download clrjit from SuperPMI Azure storage.
+Download clrjit from SuperPMI Azure storage. If -git_hash is given, download exactly
+that JIT. If -git_hash is not given, find the latest git hash from the main branch that
+corresponds to the current tree, and download that JIT. That is, find an appropriate
+"baseline" JIT for doing asm diffs.
  """
  
  list_description = """\
  List clrjit in SuperPMI Azure storage.
  """
  
-host_os_help = "OS (windows, OSX, Linux). Default: current OS."
-
  arch_help = "Architecture (x64, x86, arm, arm64). Default: current architecture."
  
  build_type_help = "Build type (Debug, Checked, Release). Default: Checked."
  
+host_os_help = "OS (windows, OSX, Linux). Default: current OS."
+
+spmi_location_help = """\
+Directory in which to put SuperPMI files, such as downloaded MCH files, asm diffs, and repro .MC files.
+Optional. Default is 'spmi' within the repo 'artifacts' directory.
+If 'SUPERPMI_CACHE_DIRECTORY' environment variable is set to a path, it will use that directory.
+"""
+
  git_hash_help = "git hash"
  
  target_dir_help = "Directory to put the downloaded JIT."
@@ -82,6 +91,7 @@ common_parser = argparse.ArgumentParser(add_help=False)
  common_parser.add_argument("-arch", help=arch_help)
  common_parser.add_argument("-build_type", default="Checked", help=build_type_help)
  common_parser.add_argument("-host_os", help=host_os_help)
+common_parser.add_argument("-spmi_location", help=spmi_location_help)
  
  # subparser for upload
  upload_parser = subparsers.add_parser("upload", description=upload_description, parents=[common_parser])
@@ -93,8 +103,8 @@ upload_parser.add_argument("--skip_cleanup", action="store_true", help=skip_clea
  # subparser for download
  download_parser = subparsers.add_parser("download", description=download_description, parents=[common_parser])
  
-download_parser.add_argument("-git_hash", required=True, help=git_hash_help)
-download_parser.add_argument("-target_dir", required=True, help=target_dir_help)
+download_parser.add_argument("-git_hash", help=git_hash_help)
+download_parser.add_argument("-target_dir", help=target_dir_help)
  download_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help)
  
  # subparser for list
@@ -172,6 +182,111 @@ def determine_jit_name(coreclr_args):
          raise RuntimeError("Unknown OS.")
  
  
+def process_git_hash_arg(coreclr_args):
+    """ Process the -git_hash argument.
+
+        If the argument is present, use that to download a JIT.
+        If not present, try to find and download a JIT based on the current environment:
+        1. Determine the current directory git hash using:
+             git rev-parse HEAD
+           Call the result `current_git_hash`.
+        2. Determine the baseline: where does this hash meet `main` using:
+             git merge-base `current_git_hash` main
+           Call the result `base_git_hash`.
+        3. Figure out the latest hash, starting with `base_git_hash`, that contains any changes to
+           the src/coreclr/jit directory. (We do this because the JIT rolling build only includes
+           builds for changes to this directory. So, this logic needs to stay in sync with the logic
+           that determines what causes the JIT rolling build to run. E.g., it should also get
+           rebuilt if the JIT-EE interface GUID changes. Alternatively, we can take the entire list
+           of changes, and probe the rolling build drop for all of them.)
+        4. Starting with `base_git_hash`, and possibly walking to older changes, look for matching builds
+           in the JIT rolling build drops.
+        5. If a JIT directory in Azure Storage is found, set coreclr_args.git_hash to that git hash to use
+           for downloading.
+
+    Args:
+        coreclr_args (CoreclrArguments) : parsed args
+
+    Returns:
+        Nothing
+
+        coreclr_args.git_hash is set to the git hash to use
+
+        An exception is thrown if the `-git_hash` argument is unspecified, and we don't find an appropriate
+        JIT to download.
+    """
+
+    if coreclr_args.git_hash is not None:
+        return
+
+    # Do all the remaining commands, including a number of 'git' commands including relative paths,
+    # from the root of the runtime repo.
+
+    with ChangeDir(coreclr_args.runtime_repo_location):
+        command = [ "git", "rev-parse", "HEAD" ]
+        print("Invoking: {}".format(" ".join(command)))
+        proc = subprocess.Popen(command, stdout=subprocess.PIPE)
+        stdout_git_rev_parse, _ = proc.communicate()
+        return_code = proc.returncode
+        if return_code == 0:
+            current_git_hash = stdout_git_rev_parse.decode('utf-8').strip()
+            print("Current hash: {}".format(current_git_hash))
+        else:
+            raise RuntimeError("Couldn't determine current git hash")
+
+        # We've got the current hash; figure out the baseline hash.
+        command = [ "git", "merge-base", current_git_hash, "origin/main" ]
+        print("Invoking: {}".format(" ".join(command)))
+        proc = subprocess.Popen(command, stdout=subprocess.PIPE)
+        stdout_git_merge_base, _ = proc.communicate()
+        return_code = proc.returncode
+        if return_code == 0:
+            base_git_hash = stdout_git_merge_base.decode('utf-8').strip()
+            print("Baseline hash: {}".format(base_git_hash))
+        else:
+            raise RuntimeError("Couldn't determine baseline git hash")
+
+        # Enumerate the last 20 changes, starting with the baseline, that included JIT changes.
+        command = [ "git", "log", "--pretty=format:%H", base_git_hash, "-20", "--", "src/coreclr/jit/*" ]
+        print("Invoking: {}".format(" ".join(command)))
+        proc = subprocess.Popen(command, stdout=subprocess.PIPE)
+        stdout_change_list, _ = proc.communicate()
+        return_code = proc.returncode
+        change_list_hashes = []
+        if return_code == 0:
+            change_list_hashes = stdout_change_list.decode('utf-8').strip().splitlines()
+        else:
+            raise RuntimeError("Couldn't determine list of JIT changes starting with baseline hash")
+
+        if len(change_list_hashes) == 0:
+            raise RuntimeError("No JIT changes found starting with baseline hash")
+
+        # For each hash, see if the rolling build contains the JIT.
+
+        hashnum = 1
+        for git_hash in change_list_hashes:
+            print("try {}: {}".format(hashnum, git_hash))
+
+            # Set the git hash to look for
+            # Note: there's a slight inefficiency here because this code searches for a JIT at this hash value, and
+            # then when we go to download, we do the same search again because we don't cache the result and pass it
+            # directly on to the downloader.
+            coreclr_args.git_hash = git_hash
+            urls = get_jit_urls(coreclr_args, find_all=False)
+            if len(urls) > 1:
+                if hashnum > 1:
+                    print("Warning: the baseline found is not built with the first git hash with JIT code changes; there may be extraneous diffs")
+                return
+
+            # We didn't find a baseline; keep looking
+            hashnum += 1
+
+        # We ran out of hashes of JIT changes, and didn't find a baseline. Give up.
+        print("Error: no baseline JIT found")
+
+    raise RuntimeError("No baseline JIT found")
+
+
  def list_az_jits(filter_func=lambda unused: True, prefix_string = None):
      """ List the JITs in Azure Storage using REST api
  
@@ -198,7 +313,7 @@ def list_az_jits(filter_func=lambda unused: True, prefix_string = None):
  
      urls = []
  
-    list_az_container_uri_root = az_blob_storage_container_uri + "?restype=container&comp=list&prefix=" + az_builds_root_folder + "/"
+    list_az_container_uri_root = az_blob_storage_jitrollingbuild_container_uri + "?restype=container&comp=list&prefix=" + az_builds_root_folder + "/"
      if prefix_string:
          list_az_container_uri_root += prefix_string
  
@@ -268,7 +383,7 @@ def upload_command(coreclr_args):
      print("JIT upload")
  
      def upload_blob(file, blob_name):
-        blob_client = blob_service_client.get_blob_client(container=az_container_name, blob=blob_name)
+        blob_client = blob_service_client.get_blob_client(container=az_jitrollingbuild_container_name, blob=blob_name)
  
          # Check if the blob already exists, and delete it if it does, before uploading / replacing it.
          try:
@@ -382,14 +497,14 @@ def upload_command(coreclr_args):
                  total_bytes_uploaded += zip_stat_result.st_size
  
                  blob_name = "{}/{}".format(blob_folder_name, zip_name)
-                print("Uploading: {} ({}) -> {}".format(file, zip_path, az_blob_storage_container_uri + "/" + blob_name))
+                print("Uploading: {} ({}) -> {}".format(file, zip_path, az_blob_storage_jitrollingbuild_container_uri + "/" + blob_name))
                  upload_blob(zip_path, blob_name)
              else:
                  file_stat_result = os.stat(file)
                  total_bytes_uploaded += file_stat_result.st_size
                  file_name = os.path.basename(file)
                  blob_name = "{}/{}".format(blob_folder_name, file_name)
-                print("Uploading: {} -> {}".format(file, az_blob_storage_container_uri + "/" + blob_name))
+                print("Uploading: {} -> {}".format(file, az_blob_storage_jitrollingbuild_container_uri + "/" + blob_name))
                  upload_blob(file, blob_name)
  
      print("Uploaded {:n} bytes".format(total_bytes_uploaded))
@@ -466,7 +581,7 @@ def get_jit_urls(coreclr_args, find_all=False):
      """
  
      blob_filter_string = "{}/{}/{}/{}".format(coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type)
-    blob_prefix_filter = "{}/{}/{}".format(az_blob_storage_container_uri, az_builds_root_folder, blob_filter_string).lower()
+    blob_prefix_filter = "{}/{}/{}".format(az_blob_storage_jitrollingbuild_container_uri, az_builds_root_folder, blob_filter_string).lower()
  
      # Determine if a URL in Azure Storage should be allowed. The URL looks like:
      #   https://clrjit.blob.core.windows.net/jitrollingbuild/builds/git_hash/Linux/x64/Checked/clrjit.dll
@@ -480,17 +595,27 @@ def get_jit_urls(coreclr_args, find_all=False):
  
  
  def download_command(coreclr_args):
-    """ Download the JIT
+    """ Download the JITs
  
      Args:
          coreclr_args (CoreclrArguments): parsed args
      """
  
      urls = get_jit_urls(coreclr_args, find_all=False)
-    if urls is None:
+    if len(urls) == 0:
+        print("Nothing to download")
          return
  
-    download_urls(urls, coreclr_args.target_dir)
+    if coreclr_args.target_dir is None:
+        # Use the same default download location for the JIT as superpmi.py uses for downloading a baseline JIT.
+        default_basejit_root_dir = os.path.join(coreclr_args.spmi_location, "basejit")
+        target_dir = os.path.join(default_basejit_root_dir, "{}.{}.{}.{}".format(coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type))
+        if not os.path.isdir(target_dir):
+            os.makedirs(target_dir)
+    else:
+        target_dir = coreclr_args.target_dir
+
+    download_urls(urls, target_dir)
  
  
  def list_command(coreclr_args):
@@ -501,7 +626,8 @@ def list_command(coreclr_args):
      """
  
      urls = get_jit_urls(coreclr_args, find_all=coreclr_args.all)
-    if urls is None:
+    if len(urls) == 0:
+        print("No JITs found")
          return
  
      count = len(urls)
@@ -536,6 +662,21 @@ def setup_args(args):
                          lambda unused: True,
                          "Unable to set mode")
  
+    def setup_spmi_location_arg(spmi_location):
+        if spmi_location is None:
+            if "SUPERPMI_CACHE_DIRECTORY" in os.environ:
+                spmi_location = os.environ["SUPERPMI_CACHE_DIRECTORY"]
+                spmi_location = os.path.abspath(spmi_location)
+            else:
+                spmi_location = os.path.abspath(os.path.join(coreclr_args.artifacts_location, "spmi"))
+        return spmi_location
+
+    coreclr_args.verify(args,
+                        "spmi_location",
+                        lambda unused: True,
+                        "Unable to set spmi_location",
+                        modify_arg=setup_spmi_location_arg)
+
      if coreclr_args.mode == "upload":
  
          coreclr_args.verify(args,
@@ -575,10 +716,12 @@ def setup_args(args):
                              lambda unused: True,
                              "Unable to set skip_cleanup")
  
-        if not os.path.isdir(coreclr_args.target_dir):
+        if coreclr_args.target_dir is not None and not os.path.isdir(coreclr_args.target_dir):
              print("--target_dir directory does not exist")
              raise RuntimeError("Error")
  
+        process_git_hash_arg(coreclr_args)
+
      elif coreclr_args.mode == "list":
  
          coreclr_args.verify(args,
diff --git a/src/coreclr/scripts/jitutil.py b/src/coreclr/scripts/jitutil.py

new file mode 100644 (file)

index 0000000..5bff7e3
--- /dev/null
+++ b/src/coreclr/scripts/jitutil.py
@@ -0,0 +1,666 @@
+#!/usr/bin/env python3
+#
+# Licensed to the .NET Foundation under one or more agreements.
+# The .NET Foundation licenses this file to you under the MIT license.
+#
+# Title               : jitutil.py
+#
+# Notes:
+#
+# Utility functions used by Python scripts used by the CLR JIT team.
+#
+################################################################################
+################################################################################
+
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import logging
+import urllib
+import urllib.request
+import zipfile
+
+################################################################################
+##
+## Helper classes
+##
+################################################################################
+
+class TempDir:
+    """ Class to create a temporary working directory, or use one that is passed as an argument.
+
+        Use with: "with TempDir() as temp_dir" to change to that directory and then automatically
+        change back to the original working directory afterwards and remove the temporary
+        directory and its contents (if skip_cleanup is False).
+    """
+
+    def __init__(self, path=None, skip_cleanup=False):
+        self.mydir = tempfile.mkdtemp() if path is None else path
+        self.cwd = None
+        self._skip_cleanup = skip_cleanup
+
+    def __enter__(self):
+        self.cwd = os.getcwd()
+        os.chdir(self.mydir)
+        return self.mydir
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        os.chdir(self.cwd)
+        if not self._skip_cleanup:
+            shutil.rmtree(self.mydir)
+
+class ChangeDir:
+    """ Class to temporarily change to a given directory. Use with "with".
+    """
+
+    def __init__(self, mydir):
+        self.mydir = mydir
+        self.cwd = None
+
+    def __enter__(self):
+        self.cwd = os.getcwd()
+        os.chdir(self.mydir)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        os.chdir(self.cwd)
+
+
+################################################################################
+##
+## Azure DevOps pipelines helper functions
+##
+################################################################################
+
+def set_pipeline_variable(name, value):
+    """ This method sets pipeline variable.
+
+    Args:
+        name (string): Name of the variable.
+        value (string): Value of the variable.
+    """
+    define_variable_format = "##vso[task.setvariable variable={0}]{1}"
+    print("{0} -> {1}".format(name, value))  # logging
+    print(define_variable_format.format(name, value))  # set variable
+
+
+################################################################################
+##
+## Helper functions
+##
+################################################################################
+
+def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=None):
+    """ Runs the command.
+
+    Args:
+        command_to_run ([string]): Command to run along with arguments.
+        _cwd (string): Current working directory.
+        _exit_on_fail (bool): If it should exit on failure.
+        _output_file (): 
+    Returns:
+        (string, string, int): Returns a tuple of stdout, stderr, and command return code if _output_file= None
+        Otherwise stdout, stderr are empty.
+    """
+    print("Running: " + " ".join(command_to_run))
+    command_stdout = ""
+    command_stderr = ""
+    return_code = 1
+
+    output_type = subprocess.STDOUT if _output_file else subprocess.PIPE
+    with subprocess.Popen(command_to_run, stdout=subprocess.PIPE, stderr=output_type, cwd=_cwd) as proc:
+
+        # For long running command, continuously print the output
+        if _output_file:
+            while True:
+                with open(_output_file, 'a') as of:
+                    output = proc.stdout.readline()
+                    if proc.poll() is not None:
+                        break
+                    if output:
+                        output_str = output.strip().decode("utf-8")
+                        print(output_str)
+                        of.write(output_str + "\n")
+        else:
+            command_stdout, command_stderr = proc.communicate()
+            if len(command_stdout) > 0:
+                print(command_stdout.decode("utf-8"))
+            if len(command_stderr) > 0:
+                print(command_stderr.decode("utf-8"))
+
+        return_code = proc.returncode
+        if _exit_on_fail and return_code != 0:
+            print("Command failed. Exiting.")
+            sys.exit(1)
+    return command_stdout, command_stderr, return_code
+
+
+def copy_directory(src_path, dst_path, verbose_output=False, verbose_copy=False, verbose_skip=False, match_func=lambda path: True):
+    """Copies directory in 'src_path' to 'dst_path' maintaining the directory
+    structure. https://docs.python.org/3.5/library/shutil.html#shutil.copytree can't
+    be used in this case because it expects the destination directory should not
+    exist, however we do call copy_directory() to copy files to same destination directory.
+
+    Args:
+        src_path (string): Path of source directory that need to be copied.
+        dst_path (string): Path where directory should be copied.
+        verbose_output (bool): True to print every copied or skipped file or error.
+        verbose_copy (bool): True to print every copied file
+        verbose_skip (bool): True to print every skipped file.
+        match_func (str -> bool) : Criteria function determining if a file is copied.
+    """
+    display_copy = verbose_output or verbose_copy
+    display_skip = verbose_output or verbose_skip
+    for item in os.listdir(src_path):
+        src_item = os.path.join(src_path, item)
+        dst_item = os.path.join(dst_path, item)
+        if os.path.isdir(src_item):
+            copy_directory(src_item, dst_item, verbose_output, verbose_copy, verbose_skip, match_func)
+        else:
+            try:
+                if match_func(src_item):
+                    if display_copy:
+                        print("> copy {0} => {1}".format(src_item, dst_item))
+                    try:
+                        if not os.path.exists(dst_path):
+                            os.makedirs(dst_path)
+                        shutil.copy2(src_item, dst_item)
+                    except PermissionError as pe_error:
+                        print('Ignoring PermissionError: {0}'.format(pe_error))
+                else:
+                    if display_skip:
+                        print("> skipping {0}".format(src_item))
+            except UnicodeEncodeError:
+                # Should this always be displayed? Or is it too verbose somehow?
+                if verbose_output:
+                    print("> Got UnicodeEncodeError")
+
+
+def copy_files(src_path, dst_path, file_names):
+    """Copy files from 'file_names' list from 'src_path' to 'dst_path'.
+    It retains the original directory structure of src_path.
+
+    Args:
+        src_path (string): Source directory from where files are copied.
+        dst_path (string): Destination directory where files to be copied.
+        file_names ([string]): List of full path file names to be copied.
+    """
+
+    print('### Copying below files from {0} to {1}:'.format(src_path, dst_path))
+    print('')
+    print(os.linesep.join(file_names))
+    for f in file_names:
+        # Create same structure in dst so we don't clobber same files names present in different directories
+        dst_path_of_file = f.replace(src_path, dst_path)
+
+        dst_directory = os.path.dirname(dst_path_of_file)
+        if not os.path.exists(dst_directory):
+            os.makedirs(dst_directory)
+        try:
+            shutil.copy2(f, dst_path_of_file)
+        except PermissionError as pe_error:
+            print('Ignoring PermissionError: {0}'.format(pe_error))
+
+
+def remove_prefix(text, prefix):
+    """ Helper function to remove a prefix `prefix` from a string `text`
+    """
+    if text.startswith(prefix):
+        return text[len(prefix):]
+    return text
+
+
+def is_zero_length_file(fpath):
+    """ Determine if a file system path refers to an existing file that is zero length
+
+    Args:
+        fpath (str) : file system path to test
+
+    Returns:
+        bool : true if the path is an existing file that is zero length
+    """
+    return os.path.isfile(fpath) and os.stat(fpath).st_size == 0
+
+
+def is_nonzero_length_file(fpath):
+    """ Determine if a file system path refers to an existing file that is non-zero length
+
+    Args:
+        fpath (str) : file system path to test
+
+    Returns:
+        bool : true if the path is an existing file that is non-zero length
+    """
+    return os.path.isfile(fpath) and os.stat(fpath).st_size != 0
+
+
+def make_safe_filename(s):
+    """ Turn a string into a string usable as a single file name component; replace illegal characters with underscores.
+
+    Args:
+        s (str) : string to convert to a file name
+
+    Returns:
+        (str) : The converted string
+    """
+    def safe_char(c):
+        if c.isalnum():
+            return c
+        else:
+            return "_"
+    return "".join(safe_char(c) for c in s)
+
+
+def find_in_path(name, pathlist, match_func=os.path.isfile):
+    """ Find a name (e.g., directory name or file name) in the file system by searching the directories
+        in a `pathlist` (e.g., PATH environment variable that has been semi-colon
+        split into a list).
+
+    Args:
+        name (str)               : name to search for
+        pathlist (list)          : list of directory names to search
+        match_func (str -> bool) : determines if the name is a match
+
+    Returns:
+        (str) The pathname of the object, or None if not found.
+    """
+    for dirname in pathlist:
+        candidate = os.path.join(dirname, name)
+        if match_func(candidate):
+            return candidate
+    return None
+
+
+def find_file(filename, pathlist):
+    """ Find a filename in the file system by searching the directories
+        in a `pathlist` (e.g., PATH environment variable that has been semi-colon
+        split into a list).
+
+    Args:
+        filename (str)          : name to search for
+        pathlist (list)         : list of directory names to search
+
+    Returns:
+        (str) The pathname of the object, or None if not found.
+    """
+    return find_in_path(filename, pathlist)
+
+
+def find_dir(dirname, pathlist):
+    """ Find a directory name in the file system by searching the directories
+        in a `pathlist` (e.g., PATH environment variable that has been semi-colon
+        split into a list).
+
+    Args:
+        dirname (str)           : name to search for
+        pathlist (list)         : list of directory names to search
+
+    Returns:
+        (str) The pathname of the object, or None if not found.
+    """
+    return find_in_path(dirname, pathlist, match_func=os.path.isdir)
+
+
+def create_unique_directory_name(root_directory, base_name):
+    """ Create a unique directory name by joining `root_directory` and `base_name`.
+        If this name already exists, append ".1", ".2", ".3", etc., to the final
+        name component until the full directory name is not found.
+
+    Args:
+        root_directory (str)     : root directory in which a new directory will be created
+        base_name (str)          : the base name of the new directory name component to be added
+
+    Returns:
+        (str) The full absolute path of the new directory. The directory has been created.
+    """
+    root_directory = os.path.abspath(root_directory)
+    full_path = os.path.join(root_directory, base_name)
+
+    count = 1
+    while os.path.isdir(full_path):
+        new_full_path = os.path.join(root_directory, base_name + "." + str(count))
+        count += 1
+        full_path = new_full_path
+
+    os.makedirs(full_path)
+    return full_path
+
+
+def create_unique_file_name(directory, base_name, extension):
+    """ Create a unique file name in the specified directory by joining `base_name` and `extension`.
+        If this name already exists, append ".1", ".2", ".3", etc., to the `base_name`
+        name component until the full file name is not found.
+
+    Args:
+        directory (str)  : directory in which a new file will be created
+        base_name (str)  : the base name of the new filename to be added
+        extension (str)  : the filename extension of the new filename to be added
+
+    Returns:
+        (str) The full absolute path of the new filename.
+    """
+    directory = os.path.abspath(directory)
+    if not os.path.isdir(directory):
+        try:
+            os.makedirs(directory)
+        except Exception as exception:
+            logging.critical(exception)
+            raise exception
+
+    full_path = os.path.join(directory, base_name + "." + extension)
+
+    count = 1
+    while os.path.isfile(full_path):
+        new_full_path = os.path.join(directory, base_name + "." + str(count) + "." + extension)
+        count += 1
+        full_path = new_full_path
+
+    return full_path
+
+
+def get_files_from_path(path, match_func=lambda path: True):
+    """ Return all files in a directory tree matching a criteria.
+
+    Args:
+        path (str)               : Either a single file to include, or a directory to traverse looking for matching
+                                   files.
+        match_func (str -> bool) : Criteria function determining if a file is added to the list
+
+    Returns:
+        Array of absolute paths of matching files
+    """
+    if not(os.path.isdir(path) or os.path.isfile(path)):
+        logging.warning("Warning: \"%s\" is not a file or directory", path)
+        return []
+
+    path = os.path.abspath(path)
+
+    files = []
+
+    if os.path.isdir(path):
+        for item in os.listdir(path):
+            files += get_files_from_path(os.path.join(path, item), match_func)
+    else:
+        if match_func(path):
+            files.append(path)
+
+    return files
+
+
+def is_url(path):
+    """ Return True if this looks like a URL
+
+    Args:
+        path (str) : name to check
+
+    Returns:
+        True it it looks like an URL, False otherwise.
+    """
+    # Probably could use urllib.parse to be more precise.
+    # If it doesn't look like an URL, treat it like a file, possibly a UNC file.
+    return path.lower().startswith("http:") or path.lower().startswith("https:")
+
+################################################################################
+##
+## Azure Storage functions
+##
+################################################################################
+
+# Decide if we're going to download and enumerate Azure Storage using anonymous
+# read access and urllib functions (False), or Azure APIs including authentication (True).
+authenticate_using_azure = False
+
+# Have we checked whether we have the Azure Storage libraries yet?
+azure_storage_libraries_check = False
+
+
+def require_azure_storage_libraries(need_azure_storage_blob=True, need_azure_identity=True):
+    """ Check for and import the Azure libraries.
+        We do this lazily, only when we decide we're actually going to need them.
+        Once we've done it once, we don't do it again.
+    """
+    global azure_storage_libraries_check, BlobServiceClient, BlobClient, ContainerClient, AzureCliCredential
+
+    if azure_storage_libraries_check:
+        return
+
+    azure_storage_libraries_check = True
+
+    azure_storage_blob_import_ok = True
+    if need_azure_storage_blob:
+        try:
+            from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
+        except:
+            azure_storage_blob_import_ok = False
+
+    azure_identity_import_ok = True
+    if need_azure_identity:
+        try:
+            from azure.identity import AzureCliCredential
+        except:
+            azure_identity_import_ok = False
+
+    if not azure_storage_blob_import_ok or not azure_identity_import_ok:
+        logging.error("One or more required Azure Storage packages is missing.")
+        logging.error("")
+        logging.error("Please install:")
+        logging.error("  pip install azure-storage-blob azure-identity")
+        logging.error("or (Windows):")
+        logging.error("  py -3 -m pip install azure-storage-blob azure-identity")
+        logging.error("See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python")
+        raise RuntimeError("Missing Azure Storage package.")
+
+    # The Azure packages spam all kinds of output to the logging channels.
+    # Restrict this to only ERROR and CRITICAL.
+    for name in logging.Logger.manager.loggerDict.keys():
+        if 'azure' in name:
+            logging.getLogger(name).setLevel(logging.ERROR)
+
+
+def report_azure_error():
+    """ Report an Azure error
+    """
+    logging.error("A problem occurred accessing Azure. Are you properly authenticated using the Azure CLI?")
+    logging.error("Install the Azure CLI from https://docs.microsoft.com/en-us/cli/azure/install-azure-cli.")
+    logging.error("Then log in to Azure using `az login`.")
+
+
+def download_with_azure(uri, target_location, fail_if_not_found=True):
+    """ Do an URI download using Azure blob storage API. Compared to urlretrieve,
+        there is no progress hook. Maybe if converted to use the async APIs we
+        could have some kind of progress?
+
+    Args:
+        uri (string)              : URI to download
+        target_location (string)  : local path to put the downloaded object
+        fail_if_not_found (bool)  : if True, fail if a download fails due to file not found (HTTP error 404).
+                                    Otherwise, ignore the failure.
+
+    Returns True if successful, False on failure
+    """
+
+    require_azure_storage_libraries()
+
+    logging.info("Download: %s -> %s", uri, target_location)
+
+    ok = True
+    az_credential = AzureCliCredential()
+    blob = BlobClient.from_blob_url(uri, credential=az_credential)
+    with open(target_location, "wb") as my_blob:
+        try:
+            download_stream = blob.download_blob(retry_total=0)
+            try:
+                my_blob.write(download_stream.readall())
+            except Exception as ex1:
+                logging.error("Error writing data to %s", target_location)
+                report_azure_error()
+                ok = False
+        except Exception as ex2:
+            logging.error("Azure error downloading %s", uri)
+            report_azure_error()
+            ok = False
+
+    if not ok and fail_if_not_found:
+        raise RuntimeError("Azure failed to download")
+    return ok
+
+################################################################################
+##
+## File downloading functions
+##
+################################################################################
+
+
+def download_progress_hook(count, block_size, total_size):
+    """ A hook for urlretrieve to report download progress
+
+    Args:
+        count (int)               : current block index
+        block_size (int)          : size of a block
+        total_size (int)          : total size of a payload
+    """
+    sys.stdout.write("\rDownloading {0:.1f}/{1:.1f} MB...".format(min(count * block_size, total_size) / 1024 / 1024, total_size / 1024 / 1024))
+    sys.stdout.flush()
+
+
+def download_with_progress_urlretrieve(uri, target_location, fail_if_not_found=True, display_progress=True):
+    """ Do an URI download using urllib.request.urlretrieve with a progress hook.
+
+        Outputs messages using the `logging` package.
+
+    Args:
+        uri (string)              : URI to download
+        target_location (string)  : local path to put the downloaded object
+        fail_if_not_found (bool)  : if True, fail if a download fails due to file not found (HTTP error 404).
+                                    Otherwise, ignore the failure.
+        display_progress (bool)   : if True, display download progress (for URL downloads). Otherwise, do not.
+
+    Returns True if successful, False on failure
+    """
+    logging.info("Download: %s -> %s", uri, target_location)
+
+    ok = True
+    try:
+        progress_display_method = download_progress_hook if display_progress else None
+        urllib.request.urlretrieve(uri, target_location, reporthook=progress_display_method)
+    except urllib.error.HTTPError as httperror:
+        if (httperror == 404) and fail_if_not_found:
+            logging.error("HTTP 404 error")
+            raise httperror
+        ok = False
+
+    if display_progress:
+        sys.stdout.write("\n") # Add newline after progress hook
+
+    return ok
+
+
+def download_one_url(uri, target_location, fail_if_not_found=True, is_azure_storage=False, display_progress=True):
+    """ Do an URI download using urllib.request.urlretrieve or Azure Storage APIs.
+
+    Args:
+        uri (string)              : URI to download
+        target_location (string)  : local path to put the downloaded object
+        fail_if_not_found (bool)  : if True, fail if a download fails due to file not found (HTTP error 404).
+                                    Otherwise, ignore the failure.
+        display_progress (bool)   : if True, display download progress (for URL downloads). Otherwise, do not.
+
+    Returns True if successful, False on failure
+    """
+    if is_azure_storage and authenticate_using_azure:
+        return download_with_azure(uri, target_location, fail_if_not_found)
+    else:
+        return download_with_progress_urlretrieve(uri, target_location, fail_if_not_found, display_progress)
+
+
+def download_files(paths, target_dir, verbose=True, fail_if_not_found=True, is_azure_storage=False, display_progress=True):
+    """ Download a set of files, specified as URLs or paths (such as Windows UNC paths),
+        to a target directory. If a file is a .ZIP file, then uncompress the file and
+        copy all its contents to the target directory.
+
+    Args:
+        paths (list): the URLs and paths to download
+        target_dir (str): target directory where files are copied. It will be created if it doesn't already exist.
+        verbose (bool): if True, do verbose logging.
+        fail_if_not_found (bool): if True, fail if a download fails due to file not found (HTTP error 404).
+                                  Otherwise, ignore the failure.
+        is_azure_storage (bool): if True, treat any URL as an Azure Storage URL
+        display_progress (bool): if True, display download progress (for URL downloads). Otherwise, do not.
+
+    Returns:
+        list of full paths of local filenames of downloaded files in the target directory
+    """
+
+    if len(paths) == 0:
+        logging.warning("No files specified to download")
+        return None
+
+    if verbose:
+        logging.info("Downloading:")
+        for item_path in paths:
+            logging.info("  %s", item_path)
+
+    # Create the target directory now, if it doesn't already exist.
+    target_dir = os.path.abspath(target_dir)
+    if not os.path.isdir(target_dir):
+        os.makedirs(target_dir)
+
+    local_paths = []
+
+    # In case we'll need a temp directory for ZIP file processing, create it first.
+    with TempDir() as temp_location:
+        for item_path in paths:
+            is_item_url = is_url(item_path)
+            item_name = item_path.split("/")[-1] if is_item_url else os.path.basename(item_path)
+
+            if item_path.lower().endswith(".zip"):
+                # Delete everything in the temp_location (from previous iterations of this loop, so previous URL downloads).
+                temp_location_items = [os.path.join(temp_location, item) for item in os.listdir(temp_location)]
+                for item in temp_location_items:
+                    if os.path.isdir(item):
+                        shutil.rmtree(item)
+                    else:
+                        os.remove(item)
+
+                download_path = os.path.join(temp_location, item_name)
+                if is_item_url:
+                    ok = download_one_url(item_path, download_path, fail_if_not_found=fail_if_not_found, is_azure_storage=is_azure_storage, display_progress=display_progress)
+                    if not ok:
+                        continue
+                else:
+                    if fail_if_not_found or os.path.isfile(item_path):
+                        if verbose:
+                            logging.info("Download: %s -> %s", item_path, download_path)
+                        shutil.copy2(item_path, download_path)
+
+                if verbose:
+                    logging.info("Uncompress %s", download_path)
+                with zipfile.ZipFile(download_path, "r") as file_handle:
+                    file_handle.extractall(temp_location)
+
+                # Copy everything that was extracted to the target directory.
+                copy_directory(temp_location, target_dir, verbose_copy=verbose, match_func=lambda path: not path.endswith(".zip"))
+
+                # The caller wants to know where all the files ended up, so compute that.
+                for dirpath, _, files in os.walk(temp_location, topdown=True):
+                    for file_name in files:
+                        if not file_name.endswith(".zip"):
+                            full_file_path = os.path.join(dirpath, file_name)
+                            target_path = full_file_path.replace(temp_location, target_dir)
+                            local_paths.append(target_path)
+            else:
+                # Not a zip file; download directory to target directory
+                download_path = os.path.join(target_dir, item_name)
+                if is_item_url:
+                    ok = download_one_url(item_path, download_path, fail_if_not_found=fail_if_not_found, is_azure_storage=is_azure_storage, display_progress=display_progress)
+                    if not ok:
+                        continue
+                else:
+                    if fail_if_not_found or os.path.isfile(item_path):
+                        if verbose:
+                            logging.info("Download: %s -> %s", item_path, download_path)
+                        shutil.copy2(item_path, download_path)
+                local_paths.append(download_path)
+
+    return local_paths
diff --git a/src/coreclr/scripts/superpmi-asmdiffs.proj b/src/coreclr/scripts/superpmi-asmdiffs.proj

new file mode 100644 (file)

index 0000000..170b306
--- /dev/null
+++ b/src/coreclr/scripts/superpmi-asmdiffs.proj
@@ -0,0 +1,73 @@
+<Project Sdk="Microsoft.DotNet.Helix.Sdk" DefaultTargets="Test">
+  <!--
+    This is useful for local testing to print the produced helix items
+    To use this when you are changing how items are produced, uncomment the target
+    and replace the Project item at the top of the file with this:
+    <Project DefaultTargets="printItems">
+
+    Once you've done that you can run this to see the results:
+    dotnet msbuild .\superpmi-asmdiffs.proj /v:n
+   -->
+
+   <!-- <PropertyGroup>
+     <HelixTargetQueues>Some_Queue</HelixTargetQueues>
+     <Platform>Windows</Platform>
+     <Architecture>x64</Architecture>
+   </PropertyGroup>
+
+  <Target Name="printItems">
+        <Message Text="@(HelixWorkItem -> 'name: %(HelixWorkItem.Identity)
+     dir: %(HelixWorkItem.PayloadDirectory)
+     pre: %(HelixWorkItem.PreCommands)
+     command: %(HelixWorkItem.Command)
+     post: %(HelixWorkItem.PostCommands)
+     timeout: %(HelixWorkItem.Timeout)  '"/>
+  </Target> -->
+
+  <PropertyGroup>
+    <Python>%HELIX_PYTHONPATH%</Python>
+    <ProductDirectory>%HELIX_CORRELATION_PAYLOAD%</ProductDirectory>
+    <SuperpmiLogsLocation>%HELIX_WORKITEM_UPLOAD_ROOT%</SuperpmiLogsLocation>
+    <!-- Workaround until https://github.com/dotnet/arcade/pull/6179 is not available -->
+    <HelixResultsDestinationDir>$(BUILD_SOURCESDIRECTORY)\artifacts\helixresults</HelixResultsDestinationDir>
+    <WorkItemCommand>$(Python) $(ProductDirectory)\superpmi_asmdiffs.py -base_jit_directory $(ProductDirectory)\base -diff_jit_directory $(ProductDirectory)\diff -log_directory $(SuperpmiLogsLocation)</WorkItemCommand>
+    <WorkItemTimeout>1:00</WorkItemTimeout>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <EnableAzurePipelinesReporter>false</EnableAzurePipelinesReporter>
+    <EnableXUnitReporter>false</EnableXUnitReporter>
+    <Creator>$(_Creator)</Creator>
+    <HelixAccessToken>$(_HelixAccessToken)</HelixAccessToken>
+    <HelixBuild>$(_HelixBuild)</HelixBuild>
+    <HelixSource>$(_HelixSource)</HelixSource>
+    <HelixTargetQueues>$(_HelixTargetQueues)</HelixTargetQueues>
+    <HelixType>$(_HelixType)</HelixType>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <HelixCorrelationPayload Include="$(CorrelationPayloadDirectory)">
+      <PayloadDirectory>%(Identity)</PayloadDirectory>
+    </HelixCorrelationPayload>
+  </ItemGroup>
+
+  <ItemGroup Condition="'$(Architecture)' == 'x64'">
+    <SPMI_Partition Include="win-x64" Platform="windows" Architecture="x64" />
+    <SPMI_Partition Include="win-arm64" Platform="windows" Architecture="arm64" />
+    <SPMI_Partition Include="unix-x64" Platform="Linux" Architecture="x64" />
+    <SPMI_Partition Include="unix-arm64" Platform="Linux" Architecture="arm64" />
+  </ItemGroup>
+
+  <ItemGroup Condition="'$(Architecture)' == 'x86'">
+    <SPMI_Partition Include="win-x86" Platform="windows" Architecture="x86" />
+    <SPMI_Partition Include="unix-arm" Platform="Linux" Architecture="arm" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <HelixWorkItem Include="@(SPMI_Partition)">
+      <Command>$(WorkItemCommand) -arch %(HelixWorkItem.Architecture) -platform %(HelixWorkItem.Platform)</Command>
+      <Timeout>$(WorkItemTimeout)</Timeout>
+      <DownloadFilesFromResults>superpmi_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).log;superpmi_download_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).log;superpmi_diff_summary_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).md</DownloadFilesFromResults>
+    </HelixWorkItem>
+  </ItemGroup>
+  </Project>
diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py

index 0c098ec..dacf3a8 100755 (executable)
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -36,13 +36,13 @@ import urllib.request
  import zipfile
  
  from coreclr_arguments import *
+from jitutil import TempDir, ChangeDir, remove_prefix, is_zero_length_file, is_nonzero_length_file, \
+    make_safe_filename, find_file, download_one_url, download_files, report_azure_error, \
+    require_azure_storage_libraries, authenticate_using_azure, \
+    create_unique_directory_name, create_unique_file_name, get_files_from_path
  
  locale.setlocale(locale.LC_ALL, '')  # Use '' for auto, or force e.g. to 'en_US.UTF-8'
  
-# Decide if we're going to download and enumerate Azure Storage using anonymous
-# read access and urllib functions (False), or Azure APIs including authentication (True).
-authenticate_using_azure = False
-
  ################################################################################
  # Azure Storage information
  ################################################################################
@@ -237,8 +237,9 @@ core_root_parser.add_argument("-core_root", help=core_root_help)
  core_root_parser.add_argument("-log_level", help=log_level_help)
  core_root_parser.add_argument("-log_file", help=log_file_help)
  core_root_parser.add_argument("-spmi_location", help=spmi_location_help)
+core_root_parser.add_argument("--no_progress", action="store_true", help=download_no_progress_help)
  
-# Create a set of arguments common to target specification. Used for replay, upload, upload-private, download, list-collections.
+# Create a set of arguments common to target specification. Used for collect, replay, asmdiffs, upload, upload-private, download, list-collections.
  
  target_parser = argparse.ArgumentParser(add_help=False)
  
@@ -342,7 +343,6 @@ download_parser.add_argument("-filter", nargs='+', help=filter_help)
  download_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
  download_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help)
  download_parser.add_argument("--force_download", action="store_true", help=force_download_help)
-download_parser.add_argument("--no_progress", action="store_true", help=download_no_progress_help)
  download_parser.add_argument("-mch_files", metavar="MCH_FILE", nargs='+', help=replay_mch_files_help)
  download_parser.add_argument("-private_store", action="append", help=private_store_help)
  
@@ -363,345 +363,6 @@ merge_mch_parser.add_argument("-pattern", required=True, help=merge_mch_pattern_
  ################################################################################
  # Helper functions
  ################################################################################
-
-def remove_prefix(text, prefix):
-    """ Helper method to remove a prefix `prefix` from a string `text`
-    """
-    if text.startswith(prefix):
-        return text[len(prefix):]
-    return text
-
-# Have we checked whether we have the Azure Storage libraries yet?
-azure_storage_libraries_check = False
-
-
-def require_azure_storage_libraries(need_azure_storage_blob=True, need_azure_identity=True):
-    """ Check for and import the Azure libraries.
-        We do this lazily, only when we decide we're actually going to need them.
-        Once we've done it once, we don't do it again.
-    """
-    global azure_storage_libraries_check, BlobServiceClient, BlobClient, ContainerClient, AzureCliCredential
-
-    if azure_storage_libraries_check:
-        return
-
-    azure_storage_libraries_check = True
-
-    azure_storage_blob_import_ok = True
-    if need_azure_storage_blob:
-        try:
-            from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
-        except:
-            azure_storage_blob_import_ok = False
-
-    azure_identity_import_ok = True
-    if need_azure_identity:
-        try:
-            from azure.identity import AzureCliCredential
-        except:
-            azure_identity_import_ok = False
-
-    if not azure_storage_blob_import_ok or not azure_identity_import_ok:
-        logging.error("One or more required Azure Storage packages is missing.")
-        logging.error("")
-        logging.error("Please install:")
-        logging.error("  pip install azure-storage-blob azure-identity")
-        logging.error("or (Windows):")
-        logging.error("  py -3 -m pip install azure-storage-blob azure-identity")
-        logging.error("See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python")
-        raise RuntimeError("Missing Azure Storage package.")
-
-    # The Azure packages spam all kinds of output to the logging channels.
-    # Restrict this to only ERROR and CRITICAL.
-    for name in logging.Logger.manager.loggerDict.keys():
-        if 'azure' in name:
-            logging.getLogger(name).setLevel(logging.ERROR)
-
-
-def download_progress_hook(count, block_size, total_size):
-    """ A hook for urlretrieve to report download progress
-
-    Args:
-        count (int)               : current block index
-        block_size (int)          : size of a block
-        total_size (int)          : total size of a payload
-    """
-    sys.stdout.write("\rDownloading {0:.1f}/{1:.1f} MB...".format(min(count * block_size, total_size) / 1024 / 1024, total_size / 1024 / 1024))
-    sys.stdout.flush()
-
-
-def download_with_progress_urlretrieve(uri, target_location, fail_if_not_found=True, display_progress=True):
-    """ Do an URI download using urllib.request.urlretrieve with a progress hook.
-
-    Args:
-        uri (string)              : URI to download
-        target_location (string)  : local path to put the downloaded object
-        fail_if_not_found (bool)  : if True, fail if a download fails due to file not found (HTTP error 404).
-                                    Otherwise, ignore the failure.
-
-    Returns True if successful, False on failure
-    """
-    logging.info("Download: %s -> %s", uri, target_location)
-
-    ok = True
-    try:
-        progress_display_method = download_progress_hook if display_progress else None
-        urllib.request.urlretrieve(uri, target_location, reporthook=progress_display_method)
-    except urllib.error.HTTPError as httperror:
-        if (httperror == 404) and fail_if_not_found:
-            logging.error("HTTP 404 error")
-            raise httperror
-        ok = False
-
-    sys.stdout.write("\n") # Add newline after progress hook
-    return ok
-
-
-def report_azure_error():
-    """ Report an Azure error
-    """
-    logging.error("A problem occurred accessing Azure. Are you properly authenticated using the Azure CLI?")
-    logging.error("Install the Azure CLI from https://docs.microsoft.com/en-us/cli/azure/install-azure-cli.")
-    logging.error("Then log in to Azure using `az login`.")
-
-
-def download_with_azure(uri, target_location, fail_if_not_found=True):
-    """ Do an URI download using Azure blob storage API. Compared to urlretrieve,
-        there is no progress hook. Maybe if converted to use the async APIs we
-        could have some kind of progress?
-
-    Args:
-        uri (string)              : URI to download
-        target_location (string)  : local path to put the downloaded object
-        fail_if_not_found (bool)  : if True, fail if a download fails due to file not found (HTTP error 404).
-                                    Otherwise, ignore the failure.
-
-    Returns True if successful, False on failure
-    """
-
-    require_azure_storage_libraries()
-
-    logging.info("Download: %s -> %s", uri, target_location)
-
-    ok = True
-    az_credential = AzureCliCredential()
-    blob = BlobClient.from_blob_url(uri, credential=az_credential)
-    with open(target_location, "wb") as my_blob:
-        try:
-            download_stream = blob.download_blob(retry_total=0)
-            try:
-                my_blob.write(download_stream.readall())
-            except Exception as ex1:
-                logging.error("Error writing data to %s", target_location)
-                report_azure_error()
-                ok = False
-        except Exception as ex2:
-            logging.error("Azure error downloading %s", uri)
-            report_azure_error()
-            ok = False
-
-    if not ok and fail_if_not_found:
-        raise RuntimeError("Azure failed to download")
-    return ok
-
-
-def download_one_url(uri, target_location, fail_if_not_found=True, display_progress=True):
-    """ Do an URI download using urllib.request.urlretrieve or Azure Storage APIs.
-
-    Args:
-        uri (string)              : URI to download
-        target_location (string)  : local path to put the downloaded object
-        fail_if_not_found (bool)  : if True, fail if a download fails due to file not found (HTTP error 404).
-                                    Otherwise, ignore the failure.
-
-    Returns True if successful, False on failure
-    """
-    if authenticate_using_azure:
-        return download_with_azure(uri, target_location, fail_if_not_found)
-    else:
-        return download_with_progress_urlretrieve(uri, target_location, fail_if_not_found, display_progress)
-
-
-def is_zero_length_file(fpath):
-    """ Determine if a file system path refers to an existing file that is zero length
-
-    Args:
-        fpath (str) : file system path to test
-
-    Returns:
-        bool : true if the path is an existing file that is zero length
-    """
-    return os.path.isfile(fpath) and os.stat(fpath).st_size == 0
-
-
-def is_nonzero_length_file(fpath):
-    """ Determine if a file system path refers to an existing file that is non-zero length
-
-    Args:
-        fpath (str) : file system path to test
-
-    Returns:
-        bool : true if the path is an existing file that is non-zero length
-    """
-    return os.path.isfile(fpath) and os.stat(fpath).st_size != 0
-
-
-def make_safe_filename(s):
-    """ Turn a string into a string usable as a single file name component; replace illegal characters with underscores.
-
-    Args:
-        s (str) : string to convert to a file name
-
-    Returns:
-        (str) : The converted string
-    """
-    def safe_char(c):
-        if c.isalnum():
-            return c
-        else:
-            return "_"
-    return "".join(safe_char(c) for c in s)
-
-
-def find_in_path(name, pathlist, match_func=os.path.isfile):
-    """ Find a name (e.g., directory name or file name) in the file system by searching the directories
-        in a `pathlist` (e.g., PATH environment variable that has been semi-colon
-        split into a list).
-
-    Args:
-        name (str)               : name to search for
-        pathlist (list)          : list of directory names to search
-        match_func (str -> bool) : determines if the name is a match
-
-    Returns:
-        (str) The pathname of the object, or None if not found.
-    """
-    for dirname in pathlist:
-        candidate = os.path.join(dirname, name)
-        if match_func(candidate):
-            return candidate
-    return None
-
-
-def find_file(filename, pathlist):
-    """ Find a filename in the file system by searching the directories
-        in a `pathlist` (e.g., PATH environment variable that has been semi-colon
-        split into a list).
-
-    Args:
-        filename (str)          : name to search for
-        pathlist (list)         : list of directory names to search
-
-    Returns:
-        (str) The pathname of the object, or None if not found.
-    """
-    return find_in_path(filename, pathlist)
-
-
-def find_dir(dirname, pathlist):
-    """ Find a directory name in the file system by searching the directories
-        in a `pathlist` (e.g., PATH environment variable that has been semi-colon
-        split into a list).
-
-    Args:
-        dirname (str)           : name to search for
-        pathlist (list)         : list of directory names to search
-
-    Returns:
-        (str) The pathname of the object, or None if not found.
-    """
-    return find_in_path(dirname, pathlist, match_func=os.path.isdir)
-
-
-def create_unique_directory_name(root_directory, base_name):
-    """ Create a unique directory name by joining `root_directory` and `base_name`.
-        If this name already exists, append ".1", ".2", ".3", etc., to the final
-        name component until the full directory name is not found.
-
-    Args:
-        root_directory (str)     : root directory in which a new directory will be created
-        base_name (str)          : the base name of the new directory name component to be added
-
-    Returns:
-        (str) The full absolute path of the new directory. The directory has been created.
-    """
-
-    root_directory = os.path.abspath(root_directory)
-    full_path = os.path.join(root_directory, base_name)
-
-    count = 1
-    while os.path.isdir(full_path):
-        new_full_path = os.path.join(root_directory, base_name + "." + str(count))
-        count += 1
-        full_path = new_full_path
-
-    os.makedirs(full_path)
-    return full_path
-
-
-def create_unique_file_name(directory, base_name, extension):
-    """ Create a unique file name in the specified directory by joining `base_name` and `extension`.
-        If this name already exists, append ".1", ".2", ".3", etc., to the `base_name`
-        name component until the full file name is not found.
-
-    Args:
-        directory (str)  : directory in which a new file will be created
-        base_name (str)  : the base name of the new filename to be added
-        extension (str)  : the filename extension of the new filename to be added
-
-    Returns:
-        (str) The full absolute path of the new filename.
-    """
-
-    directory = os.path.abspath(directory)
-    if not os.path.isdir(directory):
-        try:
-            os.makedirs(directory)
-        except Exception as exception:
-            logging.critical(exception)
-            raise exception
-
-    full_path = os.path.join(directory, base_name + "." + extension)
-
-    count = 1
-    while os.path.isfile(full_path):
-        new_full_path = os.path.join(directory, base_name + "." + str(count) + "." + extension)
-        count += 1
-        full_path = new_full_path
-
-    return full_path
-
-
-def get_files_from_path(path, match_func=lambda path: True):
-    """ Return all files in a directory tree matching a criteria.
-
-    Args:
-        path (str)               : Either a single file to include, or a directory to traverse looking for matching
-                                   files.
-        match_func (str -> bool) : Criteria function determining if a file is added to the list
-
-    Returns:
-        Array of absolute paths of matching files
-    """
-
-    if not(os.path.isdir(path) or os.path.isfile(path)):
-        logging.warning("Warning: \"%s\" is not a file or directory", path)
-        return []
-
-    path = os.path.abspath(path)
-
-    files = []
-
-    if os.path.isdir(path):
-        for item in os.listdir(path):
-            files += get_files_from_path(os.path.join(path, item), match_func)
-    else:
-        if match_func(path):
-            files.append(path)
-
-    return files
-
-
  def run_and_log(command, log_level=logging.DEBUG):
      """ Return a command and log its output to the debug logger
  
@@ -788,19 +449,6 @@ def create_artifacts_base_name(coreclr_args, mch_file):
      return artifacts_base_name
  
  
-def is_url(path):
-    """ Return True if this looks like a URL
-
-    Args:
-        path (str) : name to check
-
-    Returns:
-        True it it looks like an URL, False otherwise.
-    """
-    # Probably could use urllib.parse to be more precise.
-    # If it doesn't look like an URL, treat it like a file, possibly a UNC file.
-    return path.lower().startswith("http:") or path.lower().startswith("https:")
-
  def read_csv_metrics(path):
      """ Read a metrics summary file produced by superpmi, and return the single row containing the information as a dictionary.
  
@@ -822,47 +470,6 @@ def read_csv_metrics(path):
  # Helper classes
  ################################################################################
  
-
-class TempDir:
-    """ Class to create a temporary working directory, or use one that is passed as an argument.
-
-        Use with: "with TempDir() as temp_dir" to change to that directory and then automatically
-        change back to the original working directory afterwards and remove the temporary
-        directory and its contents (if skip_cleanup is False).
-    """
-
-    def __init__(self, path=None, skip_cleanup=False):
-        self.mydir = tempfile.mkdtemp() if path is None else path
-        self.cwd = None
-        self._skip_cleanup = skip_cleanup
-
-    def __enter__(self):
-        self.cwd = os.getcwd()
-        os.chdir(self.mydir)
-        return self.mydir
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        os.chdir(self.cwd)
-        if not self._skip_cleanup:
-            shutil.rmtree(self.mydir)
-
-
-class ChangeDir:
-    """ Class to temporarily change to a given directory. Use with "with".
-    """
-
-    def __init__(self, mydir):
-        self.mydir = mydir
-        self.cwd = None
-
-    def __enter__(self):
-        self.cwd = os.getcwd()
-        os.chdir(self.mydir)
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        os.chdir(self.cwd)
-
-
  class AsyncSubprocessHelper:
      """ Class to help with async multiprocessing tasks.
      """
@@ -2015,11 +1622,11 @@ class SuperPMIReplayAsmDiffs:
                      if current_text_diff is not None:
                          logging.info("Textual differences found in generated asm.")
  
-                        # Find jit-analyze.bat/sh on PATH, if it exists, then invoke it.
+                        # Find jit-analyze on PATH, if it exists, then invoke it.
                          ran_jit_analyze = False
                          path_var = os.environ.get("PATH")
                          if path_var is not None:
-                            jit_analyze_file = "jit-analyze.bat" if platform.system() == "Windows" else "jit-analyze.sh"
+                            jit_analyze_file = "jit-analyze.exe" if platform.system() == "Windows" else "jit-analyze"
                              jit_analyze_path = find_file(jit_analyze_file, path_var.split(os.pathsep))
                              if jit_analyze_path is not None:
                                  # It appears we have a built jit-analyze on the path, so try to run it.
@@ -2167,7 +1774,8 @@ def determine_coredis_tools(coreclr_args):
              logging.warning("Warning: Core_Root does not exist at \"%s\"; creating it now", coreclr_args.core_root)
              os.makedirs(coreclr_args.core_root)
          coredistools_uri = az_blob_storage_superpmi_container_uri + "/libcoredistools/{}-{}/{}".format(coreclr_args.host_os.lower(), coreclr_args.arch.lower(), coredistools_dll_name)
-        download_one_url(coredistools_uri, coredistools_location)
+        skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress
+        download_one_url(coredistools_uri, coredistools_location, is_azure_storage=True, display_progress=not skip_progress)
  
      assert os.path.isfile(coredistools_location)
      return coredistools_location
@@ -2203,7 +1811,8 @@ def determine_pmi_location(coreclr_args):
                  logging.info("Using PMI found at %s", pmi_location)
              else:
                  pmi_uri = az_blob_storage_superpmi_container_uri + "/pmi/pmi.dll"
-                download_one_url(pmi_uri, pmi_location)
+                skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress
+                download_one_url(pmi_uri, pmi_location, is_azure_storage=True, display_progress=not skip_progress)
  
      assert os.path.isfile(pmi_location)
      return pmi_location
@@ -2629,7 +2238,7 @@ def process_local_mch_files(coreclr_args, mch_files, mch_cache_dir):
      # Download all the urls at once, and add the local cache filenames to our accumulated list of local file names.
      skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress
      if len(urls) != 0:
-        local_mch_files += download_files(urls, mch_cache_dir, display_progress=not skip_progress)
+        local_mch_files += download_files(urls, mch_cache_dir, is_azure_storage=True, display_progress=not skip_progress)
  
      # Special case: walk the URLs list and for every ".mch" or ".mch.zip" file, check to see that either the associated ".mct" file is already
      # in the list, or add it to a new list to attempt to download (but don't fail the download if it doesn't exist).
@@ -2640,7 +2249,7 @@ def process_local_mch_files(coreclr_args, mch_files, mch_cache_dir):
              if mct_url not in urls:
                  mct_urls.append(mct_url)
      if len(mct_urls) != 0:
-        local_mch_files += download_files(mct_urls, mch_cache_dir, fail_if_not_found=False, display_progress=not skip_progress)
+        local_mch_files += download_files(mct_urls, mch_cache_dir, fail_if_not_found=False, is_azure_storage=True, display_progress=not skip_progress)
  
      # Even though we might have downloaded MCT files, only return the set of MCH files.
      local_mch_files = [file for file in local_mch_files if any(file.lower().endswith(extension) for extension in [".mch"])]
@@ -2736,94 +2345,7 @@ def download_mch_from_azure(coreclr_args, target_dir):
      urls = [blob_url_prefix + path for path in paths]
  
      skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress
-    return download_files(urls, target_dir, display_progress=not skip_progress)
-
-def download_files(paths, target_dir, verbose=True, fail_if_not_found=True, display_progress=True):
-    """ Download a set of files, specified as URLs or paths (such as Windows UNC paths),
-        to a target directory. If a file is a .ZIP file, then uncompress the file and
-        copy all its contents to the target directory.
-
-    Args:
-        paths (list): the URLs and paths to download
-        target_dir (str): target directory where files are copied.
-        verbse (bool): if True, do verbose logging.
-        fail_if_not_found (bool): if True, fail if a download fails due to file not found (HTTP error 404).
-                                  Otherwise, ignore the failure.
-
-    Returns:
-        list of full paths of local filenames of downloaded files in the target directory
-    """
-
-    if len(paths) == 0:
-        logging.warning("No files specified to download")
-        return None
-
-    if verbose:
-        logging.info("Downloading:")
-        for item_path in paths:
-            logging.info("  %s", item_path)
-
-    # Create the target directory now, if it doesn't already exist.
-    target_dir = os.path.abspath(target_dir)
-    if not os.path.isdir(target_dir):
-        os.makedirs(target_dir)
-
-    local_paths = []
-
-    # In case we'll need a temp directory for ZIP file processing, create it first.
-    with TempDir() as temp_location:
-        for item_path in paths:
-            is_item_url = is_url(item_path)
-            item_name = item_path.split("/")[-1] if is_item_url else os.path.basename(item_path)
-
-            if item_path.lower().endswith(".zip"):
-                # Delete everything in the temp_location (from previous iterations of this loop, so previous URL downloads).
-                temp_location_items = [os.path.join(temp_location, item) for item in os.listdir(temp_location)]
-                for item in temp_location_items:
-                    if os.path.isdir(item):
-                        shutil.rmtree(item)
-                    else:
-                        os.remove(item)
-
-                download_path = os.path.join(temp_location, item_name)
-                if is_item_url:
-                    ok = download_one_url(item_path, download_path, fail_if_not_found, display_progress)
-                    if not ok:
-                        continue
-                else:
-                    if fail_if_not_found or os.path.isfile(item_path):
-                        if verbose:
-                            logging.info("Download: %s -> %s", item_path, download_path)
-                        shutil.copy2(item_path, download_path)
-
-                if verbose:
-                    logging.info("Uncompress %s", download_path)
-                with zipfile.ZipFile(download_path, "r") as file_handle:
-                    file_handle.extractall(temp_location)
-
-                # Copy everything that was extracted to the target directory.
-                items = [ os.path.join(temp_location, item) for item in os.listdir(temp_location) if not item.endswith(".zip") ]
-                for item in items:
-                    target_path = os.path.join(target_dir, os.path.basename(item))
-                    if verbose:
-                        logging.info("Copy %s -> %s", item, target_path)
-                    shutil.copy2(item, target_dir)
-                    local_paths.append(target_path)
-            else:
-                # Not a zip file; download directory to target directory
-                download_path = os.path.join(target_dir, item_name)
-                if is_item_url:
-                    ok = download_one_url(item_path, download_path, fail_if_not_found, display_progress)
-                    if not ok:
-                        continue
-                else:
-                    if fail_if_not_found or os.path.isfile(item_path):
-                        if verbose:
-                            logging.info("Download: %s -> %s", item_path, download_path)
-                        shutil.copy2(item_path, download_path)
-                local_paths.append(download_path)
-
-    return local_paths
+    return download_files(urls, target_dir, is_azure_storage=True, display_progress=not skip_progress)
  
  
  def upload_mch(coreclr_args):
@@ -3218,7 +2740,7 @@ def process_base_jit_path_arg(coreclr_args):
              blob_folder_name = "{}/{}/{}/{}/{}/{}".format(az_builds_root_folder, git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type, jit_name)
              blob_uri = "{}/{}".format(az_blob_storage_jitrollingbuild_container_uri, blob_folder_name)
              urls = [ blob_uri ]
-            local_files = download_files(urls, basejit_dir, verbose=False, fail_if_not_found=False)
+            local_files = download_files(urls, basejit_dir, verbose=False, is_azure_storage=True, fail_if_not_found=False)
  
              if len(local_files) > 0:
                  if hashnum > 1:
@@ -3298,6 +2820,11 @@ def setup_args(args):
                          "Unable to set spmi_location",
                          modify_arg=setup_spmi_location_arg)
  
+    coreclr_args.verify(args,
+                        "no_progress",
+                        lambda unused: True,
+                        "Unable to set no_progress")
+
      # Finish setting up logging.
      # The spmi_location is the root directory where we put the log file.
      # Log everything to the log file and only the specified verbosity to the console logger.
@@ -3846,11 +3373,6 @@ def setup_args(args):
                              "Unable to set force_download")
  
          coreclr_args.verify(args,
-                            "no_progress",
-                            lambda unused: True,
-                            "Unable to set no_progress")
-
-        coreclr_args.verify(args,
                              "filter",
                              lambda unused: True,
                              "Unable to set filter.")
diff --git a/src/coreclr/scripts/superpmi_asmdiffs.py b/src/coreclr/scripts/superpmi_asmdiffs.py

new file mode 100644 (file)

index 0000000..aca3fe9
--- /dev/null
+++ b/src/coreclr/scripts/superpmi_asmdiffs.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+#
+# Licensed to the .NET Foundation under one or more agreements.
+# The .NET Foundation licenses this file to you under the MIT license.
+#
+# Title               : superpmi_asmdiffs.py
+#
+# Notes:
+#
+# Script to run "superpmi asmdiffs" for various collections on the Helix machines.
+#
+################################################################################
+################################################################################
+
+import argparse
+import os
+import shutil
+from coreclr_arguments import *
+from jitutil import run_command
+
+parser = argparse.ArgumentParser(description="description")
+
+parser.add_argument("-arch", help="Architecture")
+parser.add_argument("-platform", help="OS platform")
+parser.add_argument("-base_jit_directory", help="path to the directory containing base clrjit binaries")
+parser.add_argument("-diff_jit_directory", help="path to the directory containing diff clrjit binaries")
+parser.add_argument("-log_directory", help="path to the directory containing superpmi log files")
+
+def setup_args(args):
+    """ Setup the args for SuperPMI to use.
+
+    Args:
+        args (ArgParse): args parsed by arg parser
+
+    Returns:
+        args (CoreclrArguments)
+
+    """
+    coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False,
+                                    require_built_test_dir=False, default_build_type="Checked")
+
+    coreclr_args.verify(args,
+                        "arch",
+                        lambda unused: True,
+                        "Unable to set arch")
+
+    coreclr_args.verify(args,
+                        "platform",
+                        lambda unused: True,
+                        "Unable to set platform")
+
+    coreclr_args.verify(args,
+                        "base_jit_directory",
+                        lambda jit_directory: os.path.isdir(jit_directory),
+                        "base_jit_directory doesn't exist")
+
+    coreclr_args.verify(args,
+                        "diff_jit_directory",
+                        lambda jit_directory: os.path.isdir(jit_directory),
+                        "diff_jit_directory doesn't exist")
+
+    coreclr_args.verify(args,
+                        "log_directory",
+                        lambda log_directory: True,
+                        "log_directory doesn't exist")
+
+    return coreclr_args
+
+
+def main(main_args):
+    """ Run superpmi asmdiffs process on the Helix machines.
+
+    See superpmi_asmdiffs_setup.py for how the directory structure is set up in the
+    correlation payload. This script lives in the root of that directory tree.
+
+    Args:
+        main_args ([type]): Arguments to the script
+    """
+
+    python_path = sys.executable
+    script_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
+    coreclr_args = setup_args(main_args)
+
+    # It doesn't really matter where we put the downloaded SPMI artifacts.
+    # Here, they are put in <correlation_payload>/artifacts/spmi.
+    spmi_location = os.path.join(script_dir, "artifacts", "spmi")
+
+    log_directory = coreclr_args.log_directory
+    platform_name = coreclr_args.platform
+
+    # Find the built jit-analyze and put its directory on the PATH
+    jit_analyze_dir = os.path.join(script_dir, "jit-analyze")
+    if not os.path.isdir(jit_analyze_dir):
+        print("Error: jit-analyze not found in {} (continuing)".format(jit_analyze_dir))
+    else:
+        # Put the jit-analyze directory on the PATH so superpmi.py can find it.
+        print("Adding {} to PATH".format(jit_analyze_dir))
+        os.environ["PATH"] = jit_analyze_dir + os.pathsep + os.environ["PATH"]
+
+    # Find the portable `git` installation, and put `git.exe` on the PATH, for use by `jit-analyze`.
+    git_directory = os.path.join(script_dir, "git", "cmd")
+    git_exe_tool = os.path.join(git_directory, "git.exe")
+    if not os.path.isfile(git_exe_tool):
+        print("Error: `git` not found at {} (continuing)".format(git_exe_tool))
+    else:
+        # Put the git/cmd directory on the PATH so jit-analyze can find it.
+        print("Adding {} to PATH".format(git_directory))
+        os.environ["PATH"] = git_directory + os.pathsep + os.environ["PATH"]
+
+    # Figure out which JITs to use
+    os_name = "win" if platform_name.lower() == "windows" else "unix"
+    arch_name = coreclr_args.arch
+    host_arch_name = "x64" if arch_name.endswith("64") else "x86"
+    os_name = "universal" if arch_name.startswith("arm") else os_name
+    base_jit_path = os.path.join(coreclr_args.base_jit_directory, 'clrjit_{}_{}_{}.dll'.format(os_name, arch_name, host_arch_name))
+    diff_jit_path = os.path.join(coreclr_args.diff_jit_directory, 'clrjit_{}_{}_{}.dll'.format(os_name, arch_name, host_arch_name))
+
+    # Core_Root is where the superpmi tools (superpmi.exe, mcs.exe) are expected to be found.
+    # We pass the full path of the JITs to use as arguments.
+    core_root_dir = script_dir
+
+    print("Running superpmi.py download to get MCH files")
+
+    log_file = os.path.join(log_directory, "superpmi_download_{}_{}.log".format(platform_name, arch_name))
+    run_command([
+        python_path,
+        os.path.join(script_dir, "superpmi.py"),
+        "download",
+        "--no_progress",
+        "-core_root", core_root_dir,
+        "-target_os", platform_name,
+        "-target_arch", arch_name,
+        "-spmi_location", spmi_location,
+        "-log_level", "debug",
+        "-log_file", log_file
+        ], _exit_on_fail=True)
+
+    print("Running superpmi.py asmdiffs")
+    log_file = os.path.join(log_directory, "superpmi_{}_{}.log".format(platform_name, arch_name))
+
+    overall_md_summary_file = os.path.join(spmi_location, "diff_summary.md")
+    if os.path.isfile(overall_md_summary_file):
+        os.remove(overall_md_summary_file)
+
+    _, _, return_code = run_command([
+        python_path,
+        os.path.join(script_dir, "superpmi.py"),
+        "asmdiffs",
+        "--no_progress",
+        "-core_root", core_root_dir,
+        "-target_os", platform_name,
+        "-target_arch", arch_name,
+        "-arch", host_arch_name,
+        "-base_jit_path", base_jit_path,
+        "-diff_jit_path", diff_jit_path,
+        "-spmi_location", spmi_location,
+        "-error_limit", "100",
+        "-log_level", "debug",
+        "-log_file", log_file])
+
+    # If there are asm diffs, and jit-analyze ran, we'll get a diff_summary.md file in the spmi_location directory.
+    # We make sure the file doesn't exist before we run diffs, so we don't need to worry about superpmi.py creating
+    # a unique, numbered file. If there are no diffs, we still want to create this file and indicate there were no diffs.
+
+    overall_md_summary_file_target = os.path.join(log_directory, "superpmi_diff_summary_{}_{}.md".format(platform_name, arch_name))
+    if os.path.isfile(overall_md_summary_file):
+        try:
+            print("Copying summary file {} -> {}".format(overall_md_summary_file, overall_md_summary_file_target))
+            shutil.copy2(overall_md_summary_file, overall_md_summary_file_target)
+        except PermissionError as pe_error:
+            print('Ignoring PermissionError: {0}'.format(pe_error))
+    else:
+        # Write a basic summary file. Ideally, we should not generate a summary.md file. However, currently I'm seeing
+        # errors where the Helix work item fails to upload this specified file if it doesn't exist. We should change the
+        # upload to be conditional, or otherwise not error.
+        with open(overall_md_summary_file_target, "a") as f:
+            f.write("""\
+No diffs found
+""")
+
+    # TODO: the superpmi.py asmdiffs command returns a failure code if there are MISSING data even if there are
+    # no asm diffs. We should probably only fail if there are actual failures (not MISSING or asm diffs).
+
+    if return_code != 0:
+        print("Failure in {}".format(log_file))
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    sys.exit(main(args))
diff --git a/src/coreclr/scripts/superpmi_asmdiffs_setup.py b/src/coreclr/scripts/superpmi_asmdiffs_setup.py

new file mode 100644 (file)

index 0000000..6602708
--- /dev/null
+++ b/src/coreclr/scripts/superpmi_asmdiffs_setup.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+#
+# Licensed to the .NET Foundation under one or more agreements.
+# The .NET Foundation licenses this file to you under the MIT license.
+#
+# Title               : superpmi_asmdiffs_setup.py
+#
+# Notes:
+#
+# Script to setup the directory structure required to perform SuperPMI asmdiffs in CI.
+# It creates `correlation_payload_directory` with `base` and `diff` directories
+# that contain clrjit*.dll. It figures out the baseline commit hash to use for
+# a particular GitHub pull request, and downloads the JIT rolling build for that
+# commit hash. It downloads the jitutils repo and builds the jit-analyze tool. It
+# downloads a version of `git` to be used by jit-analyze.
+#
+################################################################################
+################################################################################
+
+import argparse
+import logging
+import os
+
+from coreclr_arguments import *
+from jitutil import copy_directory, set_pipeline_variable, run_command, TempDir, download_files
+
+parser = argparse.ArgumentParser(description="description")
+
+parser.add_argument("-arch", help="Architecture")
+parser.add_argument("-source_directory", help="Path to the root directory of the dotnet/runtime source tree")
+parser.add_argument("-product_directory", help="Path to the directory containing built binaries (e.g., <source_directory>/artifacts/bin/coreclr/windows.x64.Checked)")
+
+is_windows = platform.system() == "Windows"
+
+
+def setup_args(args):
+    """ Setup the args for SuperPMI to use.
+
+    Args:
+        args (ArgParse): args parsed by arg parser
+
+    Returns:
+        args (CoreclrArguments)
+
+    """
+    coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False,
+                                    require_built_test_dir=False, default_build_type="Checked")
+
+    coreclr_args.verify(args,
+                        "arch",
+                        lambda unused: True,
+                        "Unable to set arch")
+
+    coreclr_args.verify(args,
+                        "source_directory",
+                        lambda source_directory: os.path.isdir(source_directory),
+                        "source_directory doesn't exist")
+
+    coreclr_args.verify(args,
+                        "product_directory",
+                        lambda product_directory: os.path.isdir(product_directory),
+                        "product_directory doesn't exist")
+
+    return coreclr_args
+
+
+def match_jit_files(full_path):
+    """ Match all the JIT files that we want to copy and use.
+        Note that we currently only match Windows files, and not osx cross-compile files.
+        We also don't copy the "default" clrjit.dll, since we always use the fully specified
+        JITs, e.g., clrjit_win_x86_x86.dll.
+    """
+    file_name = os.path.basename(full_path)
+
+    if file_name.startswith("clrjit_") and file_name.endswith(".dll") and file_name.find("osx") == -1:
+        return True
+
+    return False
+
+
+def match_superpmi_tool_files(full_path):
+    """ Match all the SuperPMI tool files that we want to copy and use.
+        Note that we currently only match Windows files.
+    """
+    file_name = os.path.basename(full_path)
+
+    if file_name == "superpmi.exe" or file_name == "mcs.exe":
+        return True
+
+    return False
+
+
+def main(main_args):
+    """ Prepare the Helix data for SuperPMI asmdiffs Azure DevOps pipeline.
+
+    The Helix correlation payload directory is created and populated as follows:
+
+    <source_directory>\payload -- the correlation payload directory
+        -- contains the *.py scripts from <source_directory>\src\coreclr\scripts
+        -- contains superpmi.exe, mcs.exe from the target-specific build
+    <source_directory>\payload\base
+        -- contains the baseline JITs
+    <source_directory>\payload\diff
+        -- contains the diff JITs
+    <source_directory>\payload\jit-analyze
+        -- contains the self-contained jit-analyze build (from dotnet/jitutils)
+    <source_directory>\payload\git
+        -- contains a Portable ("xcopy installable") `git` tool, downloaded from:
+        https://netcorenativeassets.blob.core.windows.net/resource-packages/external/windows/git/Git-2.32.0-64-bit.zip
+        This is needed by jit-analyze to do `git diff` on the generated asm. The `<source_directory>\payload\git\cmd`
+        directory is added to the PATH.
+        NOTE: this only runs on Windows.
+
+    Then, AzDO pipeline variables are set.
+
+    Args:
+        main_args ([type]): Arguments to the script
+    """
+
+    # Set up logging.
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    stream_handler = logging.StreamHandler(sys.stdout)
+    stream_handler.setLevel(logging.INFO)
+    logger.addHandler(stream_handler)
+
+    coreclr_args = setup_args(main_args)
+
+    arch = coreclr_args.arch
+    source_directory = coreclr_args.source_directory
+    product_directory = coreclr_args.product_directory
+
+    python_path = sys.executable
+
+    # CorrelationPayload directories
+    correlation_payload_directory = os.path.join(source_directory, "payload")
+    superpmi_scripts_directory = os.path.join(source_directory, 'src', 'coreclr', 'scripts')
+    base_jit_directory = os.path.join(correlation_payload_directory, "base")
+    diff_jit_directory = os.path.join(correlation_payload_directory, "diff")
+    jit_analyze_build_directory = os.path.join(correlation_payload_directory, "jit-analyze")
+    git_directory = os.path.join(correlation_payload_directory, "git")
+
+    ######## Get the portable `git` package
+
+    git_url = "https://netcorenativeassets.blob.core.windows.net/resource-packages/external/windows/git/Git-2.32.0-64-bit.zip"
+
+    print('Downloading {} -> {}'.format(git_url, git_directory))
+
+    urls = [ git_url ]
+    # There are too many files to be verbose in the download and copy.
+    download_files(urls, git_directory, verbose=False, display_progress=False)
+    git_exe_tool = os.path.join(git_directory, "cmd", "git.exe")
+    if not os.path.isfile(git_exe_tool):
+        print('Error: `git` not found at {}'.format(git_exe_tool))
+
+    ######## Get SuperPMI python scripts
+
+    # Copy *.py to CorrelationPayload
+    print('Copying {} -> {}'.format(superpmi_scripts_directory, correlation_payload_directory))
+    copy_directory(superpmi_scripts_directory, correlation_payload_directory, verbose_copy=True,
+                   match_func=lambda path: any(path.endswith(extension) for extension in [".py"]))
+
+    ######## Get baseline JIT
+
+    # Figure out which baseline JIT to use, and download it.
+    if not os.path.exists(base_jit_directory):
+        os.makedirs(base_jit_directory)
+
+    print("Fetching history of `main` branch so we can find the baseline JIT")
+    run_command(["git", "fetch", "origin", "main"], source_directory, _exit_on_fail=True)
+
+    # Note: we only support downloading Windows versions of the JIT currently. To support downloading
+    # non-Windows JITs on a Windows machine, pass `-host_os <os>` to jitrollingbuild.py.
+    print("Running jitrollingbuild.py download to get baseline")
+    _, _, return_code = run_command([
+        python_path,
+        os.path.join(superpmi_scripts_directory, "jitrollingbuild.py"),
+        "download",
+        "-arch", arch,
+        "-target_dir", base_jit_directory],
+        source_directory)
+
+    ######## Get diff JIT
+
+    print('Copying diff binaries {} -> {}'.format(product_directory, diff_jit_directory))
+    copy_directory(product_directory, diff_jit_directory, verbose_copy=True, match_func=match_jit_files)
+
+    ######## Get SuperPMI tools
+
+    # Put the SuperPMI tools directly in the root of the correlation payload directory.
+    print('Copying SuperPMI tools {} -> {}'.format(product_directory, correlation_payload_directory))
+    copy_directory(product_directory, correlation_payload_directory, verbose_copy=True, match_func=match_superpmi_tool_files)
+
+    ######## Clone and build jitutils: we only need jit-analyze
+
+    try:
+        with TempDir() as jitutils_directory:
+            run_command(
+                ["git", "clone", "--quiet", "--depth", "1", "https://github.com/dotnet/jitutils", jitutils_directory])
+
+            # Make sure ".dotnet" directory exists, by running the script at least once
+            dotnet_script_name = "dotnet.cmd" if is_windows else "dotnet.sh"
+            dotnet_script_path = os.path.join(source_directory, dotnet_script_name)
+            run_command([dotnet_script_path, "--info"], jitutils_directory)
+
+            # Build jit-analyze only, and build it as a self-contained app (not framework-dependent).
+            # What target RID are we building? It depends on where we're going to run this code.
+            # The RID catalog is here: https://docs.microsoft.com/en-us/dotnet/core/rid-catalog.
+            #   Windows x64 => win-x64
+            #   Windows x86 => win-x86
+            #   Windows arm32 => win-arm
+            #   Windows arm64 => win-arm64
+            #   Linux x64 => linux-x64
+            #   Linux arm32 => linux-arm
+            #   Linux arm64 => linux-arm64
+            #   macOS x64 => osx-x64
+
+            # NOTE: we currently only support running on Windows x86/x64 (we don't pass the target OS)
+            RID = None
+            if arch == "x86":
+                RID = "win-x86"
+            if arch == "x64":
+                RID = "win-x64"
+
+            # Set dotnet path to run build
+            os.environ["PATH"] = os.path.join(source_directory, ".dotnet") + os.pathsep + os.environ["PATH"]
+
+            run_command([
+                "dotnet",
+                "publish",
+                "-c", "Release",
+                "--runtime", RID,
+                "--self-contained",
+                "--output", jit_analyze_build_directory,
+                os.path.join(jitutils_directory, "src", "jit-analyze", "jit-analyze.csproj")],
+                jitutils_directory)
+    except PermissionError as pe_error:
+        # Details: https://bugs.python.org/issue26660
+        print('Ignoring PermissionError: {0}'.format(pe_error))
+
+    ######## Set pipeline variables
+
+    helix_source_prefix = "official"
+    creator = ""
+
+    print('Setting pipeline variables:')
+    set_pipeline_variable("CorrelationPayloadDirectory", correlation_payload_directory)
+    set_pipeline_variable("Architecture", arch)
+    set_pipeline_variable("Creator", creator)
+    set_pipeline_variable("HelixSourcePrefix", helix_source_prefix)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    sys.exit(main(args))
diff --git a/src/coreclr/scripts/superpmi_asmdiffs_summarize.py b/src/coreclr/scripts/superpmi_asmdiffs_summarize.py

new file mode 100644 (file)

index 0000000..9601d91
--- /dev/null
+++ b/src/coreclr/scripts/superpmi_asmdiffs_summarize.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+#
+## Licensed to the .NET Foundation under one or more agreements.
+## The .NET Foundation licenses this file to you under the MIT license.
+#
+##
+# Title: superpmi_asmdiffs_summarize.py
+#
+# Notes:
+#
+# Script to summarize issues found from all partitions and print them on console.
+#
+################################################################################
+################################################################################
+
+import argparse
+import os
+import re
+from coreclr_arguments import *
+
+parser = argparse.ArgumentParser(description="description")
+
+parser.add_argument("-diff_summary_dir", help="Path to diff summary directory")
+parser.add_argument("-arch", help="Architecture")
+
+def setup_args(args):
+    """ Setup the args.
+
+    Args:
+        args (ArgParse): args parsed by arg parser
+
+    Returns:
+        args (CoreclrArguments)
+
+    """
+    coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False,
+                                    require_built_test_dir=False, default_build_type="Checked")
+
+    coreclr_args.verify(args,
+                        "diff_summary_dir",
+                        lambda diff_summary_dir: os.path.isdir(diff_summary_dir),
+                        "diff_summary_dir doesn't exist")
+
+    coreclr_args.verify(args,
+                        "arch",
+                        lambda unused: True,
+                        "Unable to set arch")
+
+    return coreclr_args
+
+
+def append_diff_file(f, arch, file_name, full_file_path):
+    """ Append a single summary file to the consolidated diff file.
+
+    Args:
+        f : File we are appending to
+        arch (string): architecture we ran on
+        file_name (string): base file name of file to append (not including path components)
+        full_file_path (string): full path to file to append
+
+    Returns:
+        True if diffs were found in the file, False otherwise
+    """
+
+    diffs_found = False
+    print("Appending {}".format(full_file_path))
+
+    # What platform is this file summarizing? We parse the filename itself, which is of the form:
+    #   superpmi_diff_summary_<platform>_<arch>.md
+
+    diff_os = "unknown"
+    diff_arch = "unknown"
+    match_obj = re.search(r'^superpmi_diff_summary_(.*)_(.*).md', file_name)
+    if match_obj is not None:
+        diff_os = match_obj.group(1)
+        diff_arch = match_obj.group(2)
+
+    with open(full_file_path, "r") as current_superpmi_md:
+        contents = current_superpmi_md.read()
+
+        # Were there actually any diffs? We currently look to see if the file contains the text "No diffs found",
+        # inserted by `superpmi_asmdiffs.py`, instead of just not having a diff summary .md file.
+        # (A missing file has the same effect.)
+        match_obj = re.search(r'^No diffs found', contents)
+        if match_obj is not None:
+            # There were no diffs in this file; don't add it to the result
+            pass
+        else:
+            diffs_found = True
+            # Write a header for this summary, and create a <summary><details> ... </details> disclosure
+            # section around the file.
+            f.write("""\
+
+## {0} {1}
+
+<details>
+
+<summary>{0} {1} details</summary>
+
+Summary file: `{2}`
+
+To reproduce these diffs on Windows {3}:
+```
+superpmi.py asmdiffs -target_os {0} -target_arch {1} -arch {3}
+```
+
+""".format(diff_os, diff_arch, file_name, arch))
+
+            # Now write the contents
+            f.write(contents)
+
+            # Write the footer (close the <details> section)
+            f.write("""\
+
+</details>
+
+""")
+
+    return diffs_found
+
+
+def main(main_args):
+    """Main entrypoint
+
+    Args:
+        main_args ([type]): Arguments to the script
+    """
+
+    coreclr_args = setup_args(main_args)
+
+    diff_summary_dir = coreclr_args.diff_summary_dir
+    arch = coreclr_args.arch
+
+    # Consolidate all superpmi_diff_summary_*.md in overall_diff_summary_<os>_<architecture>.md
+    # (Don't name it "superpmi_xxx.md" or we might consolidate it into itself.)
+    # If there are no summary files found, add a "No diffs found" text to be explicit about that.
+    #
+    # Note that we currently do this summarizing in an architecture-specific job. That means that diffs run
+    # in a Windows x64 job and those run in a Windows x86 job will be summarized in two separate files.
+    # We should create a job that depends on all the diff jobs, downloads all the .md file artifacts,
+    # and consolidates everything together in one file.
+
+    any_diffs_found = False
+
+    final_md_path = os.path.join(diff_summary_dir, "overall_diff_summary_windows_{}.md".format(arch))
+    print("Consolidating final {}".format(final_md_path))
+    with open(final_md_path, "a") as f:
+
+        f.write("""\
+# ASM diffs generated on Windows {}
+""".format(arch))
+
+        for dirpath, _, files in os.walk(diff_summary_dir):
+            for file_name in files:
+                if file_name.startswith("superpmi_") and file_name.endswith(".md"):
+                    full_file_path = os.path.join(dirpath, file_name)
+                    if append_diff_file(f, arch, file_name, full_file_path):
+                        any_diffs_found = True
+
+        if not any_diffs_found:
+            f.write("""\
+
+No diffs found
+""")
+
+    print("##vso[task.uploadsummary]{}".format(final_md_path))
+
+    with open(final_md_path, "r") as f:
+        print(f.read())
+
+    return 0
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    sys.exit(main(args))
diff --git a/src/coreclr/scripts/superpmi_aspnet.py b/src/coreclr/scripts/superpmi_aspnet.py

index 2a0225d..f2e01ad 100644 (file)
--- a/src/coreclr/scripts/superpmi_aspnet.py
+++ b/src/coreclr/scripts/superpmi_aspnet.py
@@ -20,7 +20,7 @@ import zipfile
  from os import path
  from coreclr_arguments import *
  from superpmi import TempDir, determine_mcs_tool_path, determine_superpmi_tool_path, is_nonzero_length_file
-from azdo_pipelines_util import run_command
+from jitutil import run_command
  
  # Start of parser object creation.
  is_windows = platform.system() == "Windows"
diff --git a/src/coreclr/scripts/superpmi_benchmarks.py b/src/coreclr/scripts/superpmi_benchmarks.py

index 28ebf50..1c15b9a 100644 (file)
--- a/src/coreclr/scripts/superpmi_benchmarks.py
+++ b/src/coreclr/scripts/superpmi_benchmarks.py
@@ -20,7 +20,7 @@ from os import path
  from os.path import isfile
  from shutil import copyfile
  from coreclr_arguments import *
-from azdo_pipelines_util import run_command, ChangeDir, TempDir
+from jitutil import run_command, ChangeDir, TempDir
  
  # Start of parser object creation.
  is_windows = platform.system() == "Windows"
diff --git a/src/coreclr/scripts/superpmi_collect_setup.py b/src/coreclr/scripts/superpmi_collect_setup.py

index a6a9b84..ce102ea 100644 (file)
--- a/src/coreclr/scripts/superpmi_collect_setup.py
+++ b/src/coreclr/scripts/superpmi_collect_setup.py
@@ -37,7 +37,7 @@ import os
  import stat
  
  from coreclr_arguments import *
-from azdo_pipelines_util import run_command, copy_directory, copy_files, set_pipeline_variable, ChangeDir, TempDir
+from jitutil import run_command, copy_directory, copy_files, set_pipeline_variable, ChangeDir, TempDir
  
  
  # Start of parser object creation.
@@ -398,7 +398,7 @@ def main(main_args):
  
      # create superpmi directory
      print('Copying {} -> {}'.format(superpmi_src_directory, superpmi_dst_directory))
-    copy_directory(superpmi_src_directory, superpmi_dst_directory, match_func=lambda path: any(path.endswith(extension) for extension in [".py"]))
+    copy_directory(superpmi_src_directory, superpmi_dst_directory, verbose_output=True, match_func=lambda path: any(path.endswith(extension) for extension in [".py"]))
  
      if is_windows:
          acceptable_copy = lambda path: any(path.endswith(extension) for extension in [".py", ".dll", ".exe", ".json"])
@@ -407,7 +407,7 @@ def main(main_args):
          acceptable_copy = lambda path: (os.path.basename(path).find(".") == -1) or any(path.endswith(extension) for extension in [".py", ".dll", ".so", ".json"])
  
      print('Copying {} -> {}'.format(coreclr_args.core_root_directory, superpmi_dst_directory))
-    copy_directory(coreclr_args.core_root_directory, superpmi_dst_directory, match_func=acceptable_copy)
+    copy_directory(coreclr_args.core_root_directory, superpmi_dst_directory, verbose_output=True, match_func=acceptable_copy)
  
      # Copy all the test files to CORE_ROOT
      # The reason is there are lot of dependencies with *.Tests.dll and to ensure we do not get
@@ -448,7 +448,7 @@ def main(main_args):
              run_command(["ls", "-l", folder_name])
  
          make_readable(coreclr_args.input_directory)
-        copy_directory(coreclr_args.input_directory, superpmi_dst_directory, match_func=acceptable_copy)
+        copy_directory(coreclr_args.input_directory, superpmi_dst_directory, verbose_output=True, match_func=acceptable_copy)
  
      # Workitem directories
      workitem_directory = os.path.join(source_directory, "workitem")
diff --git a/src/coreclr/scripts/superpmi_replay.py b/src/coreclr/scripts/superpmi_replay.py

index a00b807..198b0f2 100644 (file)
--- a/src/coreclr/scripts/superpmi_replay.py
+++ b/src/coreclr/scripts/superpmi_replay.py
@@ -15,7 +15,7 @@
  import argparse
  import os
  from coreclr_arguments import *
-from azdo_pipelines_util import run_command
+from jitutil import run_command
  
  parser = argparse.ArgumentParser(description="description")
  
diff --git a/src/coreclr/scripts/superpmi_replay_setup.py b/src/coreclr/scripts/superpmi_replay_setup.py

index 73c89eb..b7717da 100644 (file)
--- a/src/coreclr/scripts/superpmi_replay_setup.py
+++ b/src/coreclr/scripts/superpmi_replay_setup.py
@@ -17,7 +17,7 @@ import argparse
  import os
  
  from coreclr_arguments import *
-from azdo_pipelines_util import copy_directory, copy_files, set_pipeline_variable
+from jitutil import copy_directory, copy_files, set_pipeline_variable
  
  parser = argparse.ArgumentParser(description="description")
  
@@ -90,12 +90,12 @@ def main(main_args):
  
      # Copy *.py to CorrelationPayload
      print('Copying {} -> {}'.format(superpmi_src_directory, correlation_payload_directory))
-    copy_directory(superpmi_src_directory, correlation_payload_directory,
+    copy_directory(superpmi_src_directory, correlation_payload_directory, verbose_output=True,
                     match_func=lambda path: any(path.endswith(extension) for extension in [".py"]))
  
      # Copy clrjit*_arch.dll binaries to CorrelationPayload
      print('Copying binaries {} -> {}'.format(product_directory, correlation_payload_directory))
-    copy_directory(product_directory, correlation_payload_directory, match_func=match_correlation_files)
+    copy_directory(product_directory, correlation_payload_directory, verbose_output=True, match_func=match_correlation_files)
  
      # Set variables
      print('Setting pipeline variables:')
author	Bruce Forstall <brucefo@microsoft.com>
	Wed, 10 Nov 2021 21:28:49 +0000 (13:28 -0800)
committer	GitHub <noreply@github.com>
	Wed, 10 Nov 2021 21:28:49 +0000 (13:28 -0800)
eng/pipelines/coreclr/superpmi-asmdiffs.yml	[new file with mode: 0644]	patch \| blob
eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml	[new file with mode: 0644]	patch \| blob
eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml		patch \| blob \| history
eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml		patch \| blob \| history
eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml	[new file with mode: 0644]	patch \| blob
src/coreclr/scripts/antigen_run.py		patch \| blob \| history
src/coreclr/scripts/azdo_pipelines_util.py	[deleted file]	patch \| blob \| history
src/coreclr/scripts/fuzzer_setup.py		patch \| blob \| history
src/coreclr/scripts/fuzzlyn_run.py		patch \| blob \| history
src/coreclr/scripts/jitrollingbuild.py		patch \| blob \| history
src/coreclr/scripts/jitutil.py	[new file with mode: 0644]	patch \| blob
src/coreclr/scripts/superpmi-asmdiffs.proj	[new file with mode: 0644]	patch \| blob
src/coreclr/scripts/superpmi.py		patch \| blob \| history
src/coreclr/scripts/superpmi_asmdiffs.py	[new file with mode: 0644]	patch \| blob
src/coreclr/scripts/superpmi_asmdiffs_setup.py	[new file with mode: 0644]	patch \| blob
src/coreclr/scripts/superpmi_asmdiffs_summarize.py	[new file with mode: 0644]	patch \| blob
src/coreclr/scripts/superpmi_aspnet.py		patch \| blob \| history
src/coreclr/scripts/superpmi_benchmarks.py		patch \| blob \| history
src/coreclr/scripts/superpmi_collect_setup.py		patch \| blob \| history
src/coreclr/scripts/superpmi_replay.py		patch \| blob \| history
src/coreclr/scripts/superpmi_replay_setup.py		patch \| blob \| history