From: Edward Yang <ezyang@fb.com>
Date: Wed, 5 Dec 2018 17:21:13 +0000 (-0800)
Subject: Unify build_caffe2_amd.py and build_pytorch_amd.py (#14769)
X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~2456
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=62f4db6d8a5c72621e8a88c9f0144d98225dea7c;p=platform%2Fupstream%2Fpytorch.git

Unify build_caffe2_amd.py and build_pytorch_amd.py (#14769)

Summary:
I need to preserve ability to HIPify out-of-place files
only, so build_amd.py grows a --out-of-place-only flag.

Stacked on #14757
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14769

Differential Revision: D13340154

Pulled By: ezyang

fbshipit-source-id: 1b855bc79e824ea94517a893236fd2c8ba4cb79d
---

diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh
index d9d1fd7..bf2ea87 100755
--- a/.jenkins/caffe2/build.sh
+++ b/.jenkins/caffe2/build.sh
@@ -148,8 +148,7 @@ if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
   CMAKE_ARGS+=("-USE_LMDB=ON")
 
   ########## HIPIFY Caffe2 operators
-  ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py"
-  ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py"
+  ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_amd.py"
 fi
 
 # building bundled nccl in this config triggers a bug in nvlink. For
diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
index 73d8b4d..5dd3e53 100755
--- a/.jenkins/pytorch/build.sh
+++ b/.jenkins/pytorch/build.sh
@@ -79,8 +79,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
     export PATH="$CACHE_WRAPPER_DIR:$PATH"
   fi
 
-  python tools/amd_build/build_pytorch_amd.py
-  python tools/amd_build/build_caffe2_amd.py
+  python tools/amd_build/build_amd.py
   # OPENCV is needed to enable ImageInput operator in caffe2 resnet5_trainer
   # LMDB is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip
   USE_ROCM=1 USE_LMDB=1 USE_OPENCV=1 python setup.py install --user
diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt
index a89dce9..d1b0af9 100644
--- a/c10/hip/CMakeLists.txt
+++ b/c10/hip/CMakeLists.txt
@@ -28,8 +28,8 @@ set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY
 file(GLOB_RECURSE C10_HIP_HEADERS *.h)
 hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
 
-# Propagate HIP_HCC_FLAGS that were set from Dependencies.cmake
-target_compile_options(c10_hip PRIVATE ${HIP_HCC_FLAGS})
+# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
 
 # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
 # minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
diff --git a/tools/README.md b/tools/README.md
index b693918..4b22c26 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -54,10 +54,8 @@ Important if you want to run on AMD GPU:
   into AMD HIP.  Right now, PyTorch and Caffe2 share logic for how to
   do this transpilation, but have separate entry-points for transpiling
   either PyTorch or Caffe2 code.
-  * [build_caffe2_amd.py](amd_build/build_caffe2_amd.py) - Script
-    for HIPifying the Caffe2 codebase.
-  * [build_pytorch_amd.py](amd_build/build_pytorch_amd.py) - Script
-    for HIPifying the PyTorch codebase.
+  * [build_amd.py](amd_build/build_amd.py) - Top-level entry
+    point for HIPifying our codebase.
 
 Tools which are only situationally useful:
 
diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py
new file mode 100644
index 0000000..d109e6a
--- /dev/null
+++ b/tools/amd_build/build_amd.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, division, print_function
+import os
+import sys
+import subprocess
+import argparse
+from functools import reduce
+
+from pyHIPIFY import hipify_python
+
+parser = argparse.ArgumentParser(description='Top-level script for HIPifying, filling in most common parameters')
+parser.add_argument(
+    '--out-of-place-only',
+    action='store_true',
+    help="Whether to only run hipify out-of-place on source files")
+args = parser.parse_args()
+
+amd_build_dir = os.path.dirname(os.path.realpath(__file__))
+proj_dir = os.path.join(os.path.dirname(os.path.dirname(amd_build_dir)))
+
+includes = [
+    "caffe2/operators/*",
+    "caffe2/sgd/*",
+    "caffe2/image/*",
+    "caffe2/transforms/*",
+    "caffe2/video/*",
+    "caffe2/distributed/*",
+    "caffe2/queue/*",
+    "binaries/*",
+    "caffe2/**/*_test*",
+    "caffe2/core/*",
+    "caffe2/db/*",
+    "caffe2/utils/*",
+    "c10/cuda/*",
+    # PyTorch paths
+    # Keep this synchronized with is_pytorch_file in hipify_python.py
+    "aten/*",
+    "torch/*",
+]
+
+ignores = [
+    "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
+    "caffe2/operators/pool_op_cudnn.cu",
+    '**/hip/**',
+    "aten/src/ATen/core/*",
+]
+
+json_file = ""  # Yeah, don't ask me why the default is ""...
+if not args.out_of_place_only:
+    # List of operators currently disabled (PyTorch only)
+    json_file = os.path.join(amd_build_dir, "disabled_features.json")
+
+    # Apply patch files in place (PyTorch only)
+    patch_folder = os.path.join(amd_build_dir, "patches")
+    for filename in os.listdir(os.path.join(amd_build_dir, "patches")):
+        subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
+
+    # Make various replacements inside AMD_BUILD/torch directory
+    ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
+                    "csrc/cuda/cuda_check.h"]
+    for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
+        for filename in files:
+            if filename.endswith(".cpp") or filename.endswith(".h"):
+                source = os.path.join(root, filename)
+                # Disabled files
+                if reduce(lambda result, exclude: source.endswith(exclude) or result, ignore_files, False):
+                    continue
+                # Update contents.
+                with open(source, "r+") as f:
+                    contents = f.read()
+                    contents = contents.replace("USE_CUDA", "USE_ROCM")
+                    contents = contents.replace("CUDA_VERSION", "0")
+                    f.seek(0)
+                    f.write(contents)
+                    f.truncate()
+                    f.flush()
+                    os.fsync(f)
+
+hipify_python.hipify(
+    project_directory=proj_dir,
+    output_directory=proj_dir,
+    includes=includes,
+    ignores=ignores,
+    out_of_place_only=args.out_of_place_only,
+    json_settings=json_file,
+    add_static_casts_option=True)
diff --git a/tools/amd_build/build_caffe2_amd.py b/tools/amd_build/build_caffe2_amd.py
deleted file mode 100755
index d350400..0000000
--- a/tools/amd_build/build_caffe2_amd.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import absolute_import, division, print_function
-import os
-import sys
-
-from pyHIPIFY import hipify_python
-
-amd_build_dir = os.path.dirname(os.path.realpath(__file__))
-proj_dir = os.path.join(os.path.dirname(os.path.dirname(amd_build_dir)))
-
-includes = [
-    "caffe2/operators/*",
-    "caffe2/sgd/*",
-    "caffe2/image/*",
-    "caffe2/transforms/*",
-    "caffe2/video/*",
-    "caffe2/distributed/*",
-    "caffe2/queue/*",
-    "binaries/*",
-    "caffe2/**/*_test*",
-    "caffe2/core/*",
-    "caffe2/db/*",
-    "caffe2/utils/*",
-    "c10/cuda/*",
-]
-
-ignores = [
-    "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
-    "caffe2/operators/pool_op_cudnn.cu",
-    '**/hip/**',
-]
-
-file_extensions = ['.cc', '.cu', '.h', '.cuh', '.in']
-
-hipify_python.hipify(
-    project_directory=proj_dir,
-    output_directory=proj_dir,
-    includes=includes,
-    extensions=file_extensions,
-    ignores=ignores,
-    hipify_caffe2=True,
-    add_static_casts_option=True)
diff --git a/tools/amd_build/build_pytorch_amd.py b/tools/amd_build/build_pytorch_amd.py
deleted file mode 100644
index 2a08a7e..0000000
--- a/tools/amd_build/build_pytorch_amd.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from __future__ import absolute_import, division, print_function
-
-import os
-import subprocess
-import sys
-from functools import reduce
-
-from pyHIPIFY import hipify_python
-
-amd_build_dir = os.path.dirname(os.path.realpath(__file__))
-proj_dir = os.path.dirname(os.path.dirname(amd_build_dir))
-
-# Keep this synchronized with is_pytorch_file in hipify_python.py
-includes = [
-    "aten/*",
-    "torch/*",
-]
-
-ignores = [
-    "aten/src/ATen/core/*",
-]
-
-# List of operators currently disabled
-json_file = os.path.join(amd_build_dir, "disabled_features.json")
-
-# Apply patch files in place.
-patch_folder = os.path.join(amd_build_dir, "patches")
-for filename in os.listdir(os.path.join(amd_build_dir, "patches")):
-    subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
-
-# Make various replacements inside AMD_BUILD/torch directory
-ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
-                "csrc/cuda/cuda_check.h"]
-for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
-    for filename in files:
-        if filename.endswith(".cpp") or filename.endswith(".h"):
-            source = os.path.join(root, filename)
-            # Disabled files
-            if reduce(lambda result, exclude: source.endswith(exclude) or result, ignore_files, False):
-                continue
-            # Update contents.
-            with open(source, "r+") as f:
-                contents = f.read()
-                contents = contents.replace("USE_CUDA", "USE_ROCM")
-                contents = contents.replace("CUDA_VERSION", "0")
-                f.seek(0)
-                f.write(contents)
-                f.truncate()
-                f.flush()
-                os.fsync(f)
-
-hipify_python.hipify(
-    project_directory=proj_dir,
-    output_directory=proj_dir,
-    includes=includes,
-    ignores=ignores,
-    json_settings=json_file,
-    add_static_casts_option=True,
-    show_progress=False)
diff --git a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py
index 730b53b..817d13b 100644
--- a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py
+++ b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py
@@ -2270,6 +2270,8 @@ C10_MAPPINGS = collections.OrderedDict([
     ("c10/cuda/CUDAMacros.h", ("c10/hip/HIPMacros.h", API_C10)),
     ("c10/cuda/CUDAMathCompat.h", ("c10/hip/HIPMathCompat.h", API_C10)),
     ("c10/cuda/CUDAFunctions.h", ("c10/hip/HIPFunctions.h", API_C10)),
+    ("c10/cuda/impl/CUDATest.h", ("c10/hip/impl/HIPTest.h", API_C10)),
+    ("c10/cuda/impl/cuda_cmake_macros.h", ("c10/hip/impl/hip_cmake_macros.h", API_C10)),
     ("C10_CUDA_CHECK", ("C10_HIP_CHECK", API_C10)),
     ("c10::cuda", ("c10::hip", API_C10)),
     # This substitution is not permissible, because there's another copy of this
diff --git a/tools/amd_build/pyHIPIFY/hipify_python.py b/tools/amd_build/pyHIPIFY/hipify_python.py
index 49ec084..09c0566 100755
--- a/tools/amd_build/pyHIPIFY/hipify_python.py
+++ b/tools/amd_build/pyHIPIFY/hipify_python.py
@@ -205,7 +205,7 @@ class disablefuncmode(Enum):
     EMPTYBODY = 6
 
 
-def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), hipify_caffe2=False):
+def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), out_of_place_only=False):
     def _fnmatch(filepath, patterns):
         return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns)
 
@@ -231,9 +231,10 @@ def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), hi
         for filename in filenames:
             filepath = os.path.join(rel_dirpath, filename)
             if _fnmatch(filepath, includes) and (not _fnmatch(filepath, ignores)) and match_extensions(filepath):
-                if hipify_caffe2 and not is_caffe2_gpu_file(filepath):
+                if not is_pytorch_file(filepath) and not is_caffe2_gpu_file(filepath):
+                    continue
+                if out_of_place_only and not is_out_of_place(filepath):
                     continue
-
                 yield filepath
 
 
@@ -766,7 +767,11 @@ def get_hip_file_path(filepath):
     return os.path.join(dirpath, root + ext)
 
 
-# Keep this synchronized with includes/ignores in build_pytorch_amd.py
+def is_out_of_place(filepath):
+    return not is_pytorch_file(filepath)
+
+
+# Keep this synchronized with includes/ignores in build_amd.py
 def is_pytorch_file(filepath):
     if filepath.startswith("aten/"):
         if filepath.startswith("aten/src/ATen/core/"):
@@ -870,7 +875,26 @@ def fix_static_global_kernels(in_txt):
     return in_txt
 
 
-def get_kernel_template_params(output_directory, the_file, KernelDictionary, template_param_to_value):
+# Note [PyTorch and Caffe2 kernel name clobber]
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# For some reason, the static_cast logic in pyHIPIFY assumes all kernels
+# have unique names.  This may be true internally within PyTorch and
+# Caffe2, but it is not true across PyTorch and Caffe2.  The metadata
+# in these cases clobbers each other.
+#
+# To prevent this happening, KernelTemplateParams is distinguished
+# by a boolean saying if it is a PyTorch kernel or a Caffe2 kernel.
+# We can't do a more fine-grained distinction, e.g., the filename,
+# because we need to work on the kernel from files distinct from
+# the one they were originally defined in (that's why this is done
+# in two passes).
+#
+# We can soon kill static_cast handling entirely, as hcc will support
+# this properly.  So don't bother refactoring this code; it will
+# get deleted soon.
+
+
+def get_kernel_template_params(output_directory, the_file, KernelTemplateParams, template_param_to_value):
     """Scan for __global__ kernel definitions then extract its argument types, and static cast as necessary"""
     # Read the kernel file.
     with openf(os.path.join(output_directory, the_file), "r") as f:
@@ -938,7 +962,8 @@ def get_kernel_template_params(output_directory, the_file, KernelDictionary, tem
             for idx, arg_type in enumerate(argument_types):
                 formatted_args[idx] = arg_type
 
-            KernelDictionary[kernel_name] = {"kernel_with_template": kernel_with_template, "arg_types": formatted_args}
+            # See Note [PyTorch and Caffe2 kernel name clobber]
+            KernelTemplateParams[(is_pytorch_file(the_file), kernel_name)] = {"kernel_with_template": kernel_with_template, "arg_types": formatted_args}
 
         # Extract generated kernels
         # curandStateMtgp32 *state, int size, T *result, ARG1
@@ -954,7 +979,8 @@ def get_kernel_template_params(output_directory, the_file, KernelDictionary, tem
                 kernel_args = {1: "int", 2: "{0} *".format(kernel_params[0]), 3: kernel_params[1], 4: kernel_params[2]}
 
             # Argument at position 1 should be int
-            KernelDictionary[kernel_name] = {"kernel_with_template": kernel_name, "arg_types": kernel_args}
+            # See Note [PyTorch and Caffe2 kernel name clobber]
+            KernelTemplateParams[(is_pytorch_file(the_file), kernel_name)] = {"kernel_with_template": kernel_name, "arg_types": kernel_args}
 
 
 def disable_unsupported_function_call(function, input_string, replacement):
@@ -1055,7 +1081,7 @@ def extract_arguments(start, string):
 
 
 # Add static_cast to ensure that the type of kernel arguments matches that in the corresponding kernel definition
-def add_static_casts(filepath, KernelTemplateParams):
+def add_static_casts(orig_filepath, filepath, KernelTemplateParams):
     """Add static casts to kernel launches in order to keep launch argument types and kernel definition types matching.
 
        Example:
@@ -1083,11 +1109,13 @@ def add_static_casts(filepath, KernelTemplateParams):
             original_kernel_name_with_template = argument_strings[0].strip()
             kernel_name = original_kernel_name_with_template.split("<")[0].strip()
             ignore = ["upscale"]
-            if kernel_name in KernelTemplateParams and kernel_name not in ignore:
+            if (is_pytorch_file(orig_filepath), kernel_name) in KernelTemplateParams and kernel_name not in ignore:
                 # Add template to the kernel
                 # Add static_casts to relevant arguments
-                kernel_name_with_template = KernelTemplateParams[kernel_name]["kernel_with_template"]
-                argument_types = KernelTemplateParams[kernel_name]["arg_types"]
+                # See Note [PyTorch and Caffe2 kernel name clobber]
+                params = KernelTemplateParams[(is_pytorch_file(orig_filepath), kernel_name)]
+                kernel_name_with_template = params["kernel_with_template"]
+                argument_types = params["arg_types"]
 
                 # The first 5 arguments are simply (function, number blocks, dimension blocks, shared memory, stream)
                 # old_kernel_launch_parameters - will contain the actual arguments to the function itself.
@@ -1210,11 +1238,11 @@ def main():
         required=False)
 
     parser.add_argument(
-        '--hipify_caffe2',
+        '--out-of-place-only',
         type=str2bool,
         default=False,
-        help="Whether to hipify caffe2 source",
-        required=False)
+        help="Whether to only run hipify out-of-place on source files",
+        required=False),
 
     parser.add_argument(
         '--ignores',
@@ -1254,7 +1282,7 @@ def main():
         includes=args.includes,
         json_settings=args.json_settings,
         add_static_casts_option=args.add_static_casts,
-        hipify_caffe2=args.hipify_caffe2,
+        out_of_place_only=args.out_of_place_only,
         ignores=args.ignores,
         show_progress=args.show_progress)
 
@@ -1262,12 +1290,12 @@ def main():
 def hipify(
     project_directory,
     show_detailed=False,
-    extensions=(".cu", ".cuh", ".c", ".cpp", ".h", ".in", ".hpp"),
+    extensions=(".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"),
     output_directory="",
     includes=(),
     json_settings="",
     add_static_casts_option=False,
-    hipify_caffe2=False,
+    out_of_place_only=False,
     ignores=(),
     show_progress=True,
 ):
@@ -1378,7 +1406,7 @@ def hipify(
 
     all_files = list(matched_files_iter(output_directory, includes=includes,
                                         ignores=ignores, extensions=extensions,
-                                        hipify_caffe2=hipify_caffe2))
+                                        out_of_place_only=out_of_place_only))
 
     # Start Preprocessor
     preprocess(
@@ -1400,6 +1428,7 @@ def hipify(
         # Execute the Clang Tool to Automatically add static casts
         for filepath in all_files:
             add_static_casts(
+                filepath,
                 os.path.join(
                     output_directory,
                     get_hip_file_path(filepath)),