From: Edward Yang Date: Wed, 5 Dec 2018 17:21:13 +0000 (-0800) Subject: Unify build_caffe2_amd.py and build_pytorch_amd.py (#14769) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~2456 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=62f4db6d8a5c72621e8a88c9f0144d98225dea7c;p=platform%2Fupstream%2Fpytorch.git Unify build_caffe2_amd.py and build_pytorch_amd.py (#14769) Summary: I need to preserve ability to HIPify out-of-place files only, so build_amd.py grows a --out-of-place-only flag. Stacked on #14757 Pull Request resolved: https://github.com/pytorch/pytorch/pull/14769 Differential Revision: D13340154 Pulled By: ezyang fbshipit-source-id: 1b855bc79e824ea94517a893236fd2c8ba4cb79d --- diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh index d9d1fd7..bf2ea87 100755 --- a/.jenkins/caffe2/build.sh +++ b/.jenkins/caffe2/build.sh @@ -148,8 +148,7 @@ if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then CMAKE_ARGS+=("-USE_LMDB=ON") ########## HIPIFY Caffe2 operators - ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py" - ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py" + ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_amd.py" fi # building bundled nccl in this config triggers a bug in nvlink. For diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index 73d8b4d..5dd3e53 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -79,8 +79,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then export PATH="$CACHE_WRAPPER_DIR:$PATH" fi - python tools/amd_build/build_pytorch_amd.py - python tools/amd_build/build_caffe2_amd.py + python tools/amd_build/build_amd.py # OPENCV is needed to enable ImageInput operator in caffe2 resnet5_trainer # LMDB is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip USE_ROCM=1 USE_LMDB=1 USE_OPENCV=1 python setup.py install --user diff --git a/c10/hip/CMakeLists.txt b/c10/hip/CMakeLists.txt index a89dce9..d1b0af9 100644 --- a/c10/hip/CMakeLists.txt +++ b/c10/hip/CMakeLists.txt @@ -28,8 +28,8 @@ set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY file(GLOB_RECURSE C10_HIP_HEADERS *.h) hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS}) -# Propagate HIP_HCC_FLAGS that were set from Dependencies.cmake -target_compile_options(c10_hip PRIVATE ${HIP_HCC_FLAGS}) +# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake +target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS}) # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be # minimal. I'm not sure if we need hip_hcc or not; for now leave it out diff --git a/tools/README.md b/tools/README.md index b693918..4b22c26 100644 --- a/tools/README.md +++ b/tools/README.md @@ -54,10 +54,8 @@ Important if you want to run on AMD GPU: into AMD HIP. Right now, PyTorch and Caffe2 share logic for how to do this transpilation, but have separate entry-points for transpiling either PyTorch or Caffe2 code. - * [build_caffe2_amd.py](amd_build/build_caffe2_amd.py) - Script - for HIPifying the Caffe2 codebase. - * [build_pytorch_amd.py](amd_build/build_pytorch_amd.py) - Script - for HIPifying the PyTorch codebase. + * [build_amd.py](amd_build/build_amd.py) - Top-level entry + point for HIPifying our codebase. Tools which are only situationally useful: diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py new file mode 100644 index 0000000..d109e6a --- /dev/null +++ b/tools/amd_build/build_amd.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +from __future__ import absolute_import, division, print_function +import os +import sys +import subprocess +import argparse +from functools import reduce + +from pyHIPIFY import hipify_python + +parser = argparse.ArgumentParser(description='Top-level script for HIPifying, filling in most common parameters') +parser.add_argument( + '--out-of-place-only', + action='store_true', + help="Whether to only run hipify out-of-place on source files") +args = parser.parse_args() + +amd_build_dir = os.path.dirname(os.path.realpath(__file__)) +proj_dir = os.path.join(os.path.dirname(os.path.dirname(amd_build_dir))) + +includes = [ + "caffe2/operators/*", + "caffe2/sgd/*", + "caffe2/image/*", + "caffe2/transforms/*", + "caffe2/video/*", + "caffe2/distributed/*", + "caffe2/queue/*", + "binaries/*", + "caffe2/**/*_test*", + "caffe2/core/*", + "caffe2/db/*", + "caffe2/utils/*", + "c10/cuda/*", + # PyTorch paths + # Keep this synchronized with is_pytorch_file in hipify_python.py + "aten/*", + "torch/*", +] + +ignores = [ + "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu", + "caffe2/operators/pool_op_cudnn.cu", + '**/hip/**', + "aten/src/ATen/core/*", +] + +json_file = "" # Yeah, don't ask me why the default is ""... +if not args.out_of_place_only: + # List of operators currently disabled (PyTorch only) + json_file = os.path.join(amd_build_dir, "disabled_features.json") + + # Apply patch files in place (PyTorch only) + patch_folder = os.path.join(amd_build_dir, "patches") + for filename in os.listdir(os.path.join(amd_build_dir, "patches")): + subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir) + + # Make various replacements inside AMD_BUILD/torch directory + ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp", + "csrc/cuda/cuda_check.h"] + for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")): + for filename in files: + if filename.endswith(".cpp") or filename.endswith(".h"): + source = os.path.join(root, filename) + # Disabled files + if reduce(lambda result, exclude: source.endswith(exclude) or result, ignore_files, False): + continue + # Update contents. + with open(source, "r+") as f: + contents = f.read() + contents = contents.replace("USE_CUDA", "USE_ROCM") + contents = contents.replace("CUDA_VERSION", "0") + f.seek(0) + f.write(contents) + f.truncate() + f.flush() + os.fsync(f) + +hipify_python.hipify( + project_directory=proj_dir, + output_directory=proj_dir, + includes=includes, + ignores=ignores, + out_of_place_only=args.out_of_place_only, + json_settings=json_file, + add_static_casts_option=True) diff --git a/tools/amd_build/build_caffe2_amd.py b/tools/amd_build/build_caffe2_amd.py deleted file mode 100755 index d350400..0000000 --- a/tools/amd_build/build_caffe2_amd.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python - -from __future__ import absolute_import, division, print_function -import os -import sys - -from pyHIPIFY import hipify_python - -amd_build_dir = os.path.dirname(os.path.realpath(__file__)) -proj_dir = os.path.join(os.path.dirname(os.path.dirname(amd_build_dir))) - -includes = [ - "caffe2/operators/*", - "caffe2/sgd/*", - "caffe2/image/*", - "caffe2/transforms/*", - "caffe2/video/*", - "caffe2/distributed/*", - "caffe2/queue/*", - "binaries/*", - "caffe2/**/*_test*", - "caffe2/core/*", - "caffe2/db/*", - "caffe2/utils/*", - "c10/cuda/*", -] - -ignores = [ - "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu", - "caffe2/operators/pool_op_cudnn.cu", - '**/hip/**', -] - -file_extensions = ['.cc', '.cu', '.h', '.cuh', '.in'] - -hipify_python.hipify( - project_directory=proj_dir, - output_directory=proj_dir, - includes=includes, - extensions=file_extensions, - ignores=ignores, - hipify_caffe2=True, - add_static_casts_option=True) diff --git a/tools/amd_build/build_pytorch_amd.py b/tools/amd_build/build_pytorch_amd.py deleted file mode 100644 index 2a08a7e..0000000 --- a/tools/amd_build/build_pytorch_amd.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import os -import subprocess -import sys -from functools import reduce - -from pyHIPIFY import hipify_python - -amd_build_dir = os.path.dirname(os.path.realpath(__file__)) -proj_dir = os.path.dirname(os.path.dirname(amd_build_dir)) - -# Keep this synchronized with is_pytorch_file in hipify_python.py -includes = [ - "aten/*", - "torch/*", -] - -ignores = [ - "aten/src/ATen/core/*", -] - -# List of operators currently disabled -json_file = os.path.join(amd_build_dir, "disabled_features.json") - -# Apply patch files in place. -patch_folder = os.path.join(amd_build_dir, "patches") -for filename in os.listdir(os.path.join(amd_build_dir, "patches")): - subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir) - -# Make various replacements inside AMD_BUILD/torch directory -ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp", - "csrc/cuda/cuda_check.h"] -for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")): - for filename in files: - if filename.endswith(".cpp") or filename.endswith(".h"): - source = os.path.join(root, filename) - # Disabled files - if reduce(lambda result, exclude: source.endswith(exclude) or result, ignore_files, False): - continue - # Update contents. - with open(source, "r+") as f: - contents = f.read() - contents = contents.replace("USE_CUDA", "USE_ROCM") - contents = contents.replace("CUDA_VERSION", "0") - f.seek(0) - f.write(contents) - f.truncate() - f.flush() - os.fsync(f) - -hipify_python.hipify( - project_directory=proj_dir, - output_directory=proj_dir, - includes=includes, - ignores=ignores, - json_settings=json_file, - add_static_casts_option=True, - show_progress=False) diff --git a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py index 730b53b..817d13b 100644 --- a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py +++ b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py @@ -2270,6 +2270,8 @@ C10_MAPPINGS = collections.OrderedDict([ ("c10/cuda/CUDAMacros.h", ("c10/hip/HIPMacros.h", API_C10)), ("c10/cuda/CUDAMathCompat.h", ("c10/hip/HIPMathCompat.h", API_C10)), ("c10/cuda/CUDAFunctions.h", ("c10/hip/HIPFunctions.h", API_C10)), + ("c10/cuda/impl/CUDATest.h", ("c10/hip/impl/HIPTest.h", API_C10)), + ("c10/cuda/impl/cuda_cmake_macros.h", ("c10/hip/impl/hip_cmake_macros.h", API_C10)), ("C10_CUDA_CHECK", ("C10_HIP_CHECK", API_C10)), ("c10::cuda", ("c10::hip", API_C10)), # This substitution is not permissible, because there's another copy of this diff --git a/tools/amd_build/pyHIPIFY/hipify_python.py b/tools/amd_build/pyHIPIFY/hipify_python.py index 49ec084..09c0566 100755 --- a/tools/amd_build/pyHIPIFY/hipify_python.py +++ b/tools/amd_build/pyHIPIFY/hipify_python.py @@ -205,7 +205,7 @@ class disablefuncmode(Enum): EMPTYBODY = 6 -def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), hipify_caffe2=False): +def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), out_of_place_only=False): def _fnmatch(filepath, patterns): return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns) @@ -231,9 +231,10 @@ def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), hi for filename in filenames: filepath = os.path.join(rel_dirpath, filename) if _fnmatch(filepath, includes) and (not _fnmatch(filepath, ignores)) and match_extensions(filepath): - if hipify_caffe2 and not is_caffe2_gpu_file(filepath): + if not is_pytorch_file(filepath) and not is_caffe2_gpu_file(filepath): + continue + if out_of_place_only and not is_out_of_place(filepath): continue - yield filepath @@ -766,7 +767,11 @@ def get_hip_file_path(filepath): return os.path.join(dirpath, root + ext) -# Keep this synchronized with includes/ignores in build_pytorch_amd.py +def is_out_of_place(filepath): + return not is_pytorch_file(filepath) + + +# Keep this synchronized with includes/ignores in build_amd.py def is_pytorch_file(filepath): if filepath.startswith("aten/"): if filepath.startswith("aten/src/ATen/core/"): @@ -870,7 +875,26 @@ def fix_static_global_kernels(in_txt): return in_txt -def get_kernel_template_params(output_directory, the_file, KernelDictionary, template_param_to_value): +# Note [PyTorch and Caffe2 kernel name clobber] +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# For some reason, the static_cast logic in pyHIPIFY assumes all kernels +# have unique names. This may be true internally within PyTorch and +# Caffe2, but it is not true across PyTorch and Caffe2. The metadata +# in these cases clobbers each other. +# +# To prevent this happening, KernelTemplateParams is distinguished +# by a boolean saying if it is a PyTorch kernel or a Caffe2 kernel. +# We can't do a more fine-grained distinction, e.g., the filename, +# because we need to work on the kernel from files distinct from +# the one they were originally defined in (that's why this is done +# in two passes). +# +# We can soon kill static_cast handling entirely, as hcc will support +# this properly. So don't bother refactoring this code; it will +# get deleted soon. + + +def get_kernel_template_params(output_directory, the_file, KernelTemplateParams, template_param_to_value): """Scan for __global__ kernel definitions then extract its argument types, and static cast as necessary""" # Read the kernel file. with openf(os.path.join(output_directory, the_file), "r") as f: @@ -938,7 +962,8 @@ def get_kernel_template_params(output_directory, the_file, KernelDictionary, tem for idx, arg_type in enumerate(argument_types): formatted_args[idx] = arg_type - KernelDictionary[kernel_name] = {"kernel_with_template": kernel_with_template, "arg_types": formatted_args} + # See Note [PyTorch and Caffe2 kernel name clobber] + KernelTemplateParams[(is_pytorch_file(the_file), kernel_name)] = {"kernel_with_template": kernel_with_template, "arg_types": formatted_args} # Extract generated kernels # curandStateMtgp32 *state, int size, T *result, ARG1 @@ -954,7 +979,8 @@ def get_kernel_template_params(output_directory, the_file, KernelDictionary, tem kernel_args = {1: "int", 2: "{0} *".format(kernel_params[0]), 3: kernel_params[1], 4: kernel_params[2]} # Argument at position 1 should be int - KernelDictionary[kernel_name] = {"kernel_with_template": kernel_name, "arg_types": kernel_args} + # See Note [PyTorch and Caffe2 kernel name clobber] + KernelTemplateParams[(is_pytorch_file(the_file), kernel_name)] = {"kernel_with_template": kernel_name, "arg_types": kernel_args} def disable_unsupported_function_call(function, input_string, replacement): @@ -1055,7 +1081,7 @@ def extract_arguments(start, string): # Add static_cast to ensure that the type of kernel arguments matches that in the corresponding kernel definition -def add_static_casts(filepath, KernelTemplateParams): +def add_static_casts(orig_filepath, filepath, KernelTemplateParams): """Add static casts to kernel launches in order to keep launch argument types and kernel definition types matching. Example: @@ -1083,11 +1109,13 @@ def add_static_casts(filepath, KernelTemplateParams): original_kernel_name_with_template = argument_strings[0].strip() kernel_name = original_kernel_name_with_template.split("<")[0].strip() ignore = ["upscale"] - if kernel_name in KernelTemplateParams and kernel_name not in ignore: + if (is_pytorch_file(orig_filepath), kernel_name) in KernelTemplateParams and kernel_name not in ignore: # Add template to the kernel # Add static_casts to relevant arguments - kernel_name_with_template = KernelTemplateParams[kernel_name]["kernel_with_template"] - argument_types = KernelTemplateParams[kernel_name]["arg_types"] + # See Note [PyTorch and Caffe2 kernel name clobber] + params = KernelTemplateParams[(is_pytorch_file(orig_filepath), kernel_name)] + kernel_name_with_template = params["kernel_with_template"] + argument_types = params["arg_types"] # The first 5 arguments are simply (function, number blocks, dimension blocks, shared memory, stream) # old_kernel_launch_parameters - will contain the actual arguments to the function itself. @@ -1210,11 +1238,11 @@ def main(): required=False) parser.add_argument( - '--hipify_caffe2', + '--out-of-place-only', type=str2bool, default=False, - help="Whether to hipify caffe2 source", - required=False) + help="Whether to only run hipify out-of-place on source files", + required=False), parser.add_argument( '--ignores', @@ -1254,7 +1282,7 @@ def main(): includes=args.includes, json_settings=args.json_settings, add_static_casts_option=args.add_static_casts, - hipify_caffe2=args.hipify_caffe2, + out_of_place_only=args.out_of_place_only, ignores=args.ignores, show_progress=args.show_progress) @@ -1262,12 +1290,12 @@ def main(): def hipify( project_directory, show_detailed=False, - extensions=(".cu", ".cuh", ".c", ".cpp", ".h", ".in", ".hpp"), + extensions=(".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"), output_directory="", includes=(), json_settings="", add_static_casts_option=False, - hipify_caffe2=False, + out_of_place_only=False, ignores=(), show_progress=True, ): @@ -1378,7 +1406,7 @@ def hipify( all_files = list(matched_files_iter(output_directory, includes=includes, ignores=ignores, extensions=extensions, - hipify_caffe2=hipify_caffe2)) + out_of_place_only=out_of_place_only)) # Start Preprocessor preprocess( @@ -1400,6 +1428,7 @@ def hipify( # Execute the Clang Tool to Automatically add static casts for filepath in all_files: add_static_casts( + filepath, os.path.join( output_directory, get_hip_file_path(filepath)),