CMAKE_ARGS+=("-USE_LMDB=ON")
########## HIPIFY Caffe2 operators
- ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py"
- ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py"
+ ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_amd.py"
fi
# building bundled nccl in this config triggers a bug in nvlink. For
export PATH="$CACHE_WRAPPER_DIR:$PATH"
fi
- python tools/amd_build/build_pytorch_amd.py
- python tools/amd_build/build_caffe2_amd.py
+ python tools/amd_build/build_amd.py
# OPENCV is needed to enable ImageInput operator in caffe2 resnet5_trainer
# LMDB is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip
USE_ROCM=1 USE_LMDB=1 USE_OPENCV=1 python setup.py install --user
file(GLOB_RECURSE C10_HIP_HEADERS *.h)
hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
-# Propagate HIP_HCC_FLAGS that were set from Dependencies.cmake
-target_compile_options(c10_hip PRIVATE ${HIP_HCC_FLAGS})
+# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
# minimal. I'm not sure if we need hip_hcc or not; for now leave it out
into AMD HIP. Right now, PyTorch and Caffe2 share logic for how to
do this transpilation, but have separate entry-points for transpiling
either PyTorch or Caffe2 code.
- * [build_caffe2_amd.py](amd_build/build_caffe2_amd.py) - Script
- for HIPifying the Caffe2 codebase.
- * [build_pytorch_amd.py](amd_build/build_pytorch_amd.py) - Script
- for HIPifying the PyTorch codebase.
+ * [build_amd.py](amd_build/build_amd.py) - Top-level entry
+ point for HIPifying our codebase.
Tools which are only situationally useful:
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import absolute_import, division, print_function
+import os
+import sys
+import subprocess
+import argparse
+from functools import reduce
+
+from pyHIPIFY import hipify_python
+
+parser = argparse.ArgumentParser(description='Top-level script for HIPifying, filling in most common parameters')
+parser.add_argument(
+ '--out-of-place-only',
+ action='store_true',
+ help="Whether to only run hipify out-of-place on source files")
+args = parser.parse_args()
+
+amd_build_dir = os.path.dirname(os.path.realpath(__file__))
+proj_dir = os.path.join(os.path.dirname(os.path.dirname(amd_build_dir)))
+
+includes = [
+ "caffe2/operators/*",
+ "caffe2/sgd/*",
+ "caffe2/image/*",
+ "caffe2/transforms/*",
+ "caffe2/video/*",
+ "caffe2/distributed/*",
+ "caffe2/queue/*",
+ "binaries/*",
+ "caffe2/**/*_test*",
+ "caffe2/core/*",
+ "caffe2/db/*",
+ "caffe2/utils/*",
+ "c10/cuda/*",
+ # PyTorch paths
+ # Keep this synchronized with is_pytorch_file in hipify_python.py
+ "aten/*",
+ "torch/*",
+]
+
+ignores = [
+ "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
+ "caffe2/operators/pool_op_cudnn.cu",
+ '**/hip/**',
+ "aten/src/ATen/core/*",
+]
+
+json_file = "" # Yeah, don't ask me why the default is ""...
+if not args.out_of_place_only:
+ # List of operators currently disabled (PyTorch only)
+ json_file = os.path.join(amd_build_dir, "disabled_features.json")
+
+ # Apply patch files in place (PyTorch only)
+ patch_folder = os.path.join(amd_build_dir, "patches")
+ for filename in os.listdir(os.path.join(amd_build_dir, "patches")):
+ subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
+
+ # Make various replacements inside AMD_BUILD/torch directory
+ ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
+ "csrc/cuda/cuda_check.h"]
+ for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
+ for filename in files:
+ if filename.endswith(".cpp") or filename.endswith(".h"):
+ source = os.path.join(root, filename)
+ # Disabled files
+ if reduce(lambda result, exclude: source.endswith(exclude) or result, ignore_files, False):
+ continue
+ # Update contents.
+ with open(source, "r+") as f:
+ contents = f.read()
+ contents = contents.replace("USE_CUDA", "USE_ROCM")
+ contents = contents.replace("CUDA_VERSION", "0")
+ f.seek(0)
+ f.write(contents)
+ f.truncate()
+ f.flush()
+ os.fsync(f)
+
+hipify_python.hipify(
+ project_directory=proj_dir,
+ output_directory=proj_dir,
+ includes=includes,
+ ignores=ignores,
+ out_of_place_only=args.out_of_place_only,
+ json_settings=json_file,
+ add_static_casts_option=True)
+++ /dev/null
-#!/usr/bin/env python
-
-from __future__ import absolute_import, division, print_function
-import os
-import sys
-
-from pyHIPIFY import hipify_python
-
-amd_build_dir = os.path.dirname(os.path.realpath(__file__))
-proj_dir = os.path.join(os.path.dirname(os.path.dirname(amd_build_dir)))
-
-includes = [
- "caffe2/operators/*",
- "caffe2/sgd/*",
- "caffe2/image/*",
- "caffe2/transforms/*",
- "caffe2/video/*",
- "caffe2/distributed/*",
- "caffe2/queue/*",
- "binaries/*",
- "caffe2/**/*_test*",
- "caffe2/core/*",
- "caffe2/db/*",
- "caffe2/utils/*",
- "c10/cuda/*",
-]
-
-ignores = [
- "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
- "caffe2/operators/pool_op_cudnn.cu",
- '**/hip/**',
-]
-
-file_extensions = ['.cc', '.cu', '.h', '.cuh', '.in']
-
-hipify_python.hipify(
- project_directory=proj_dir,
- output_directory=proj_dir,
- includes=includes,
- extensions=file_extensions,
- ignores=ignores,
- hipify_caffe2=True,
- add_static_casts_option=True)
+++ /dev/null
-from __future__ import absolute_import, division, print_function
-
-import os
-import subprocess
-import sys
-from functools import reduce
-
-from pyHIPIFY import hipify_python
-
-amd_build_dir = os.path.dirname(os.path.realpath(__file__))
-proj_dir = os.path.dirname(os.path.dirname(amd_build_dir))
-
-# Keep this synchronized with is_pytorch_file in hipify_python.py
-includes = [
- "aten/*",
- "torch/*",
-]
-
-ignores = [
- "aten/src/ATen/core/*",
-]
-
-# List of operators currently disabled
-json_file = os.path.join(amd_build_dir, "disabled_features.json")
-
-# Apply patch files in place.
-patch_folder = os.path.join(amd_build_dir, "patches")
-for filename in os.listdir(os.path.join(amd_build_dir, "patches")):
- subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
-
-# Make various replacements inside AMD_BUILD/torch directory
-ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
- "csrc/cuda/cuda_check.h"]
-for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
- for filename in files:
- if filename.endswith(".cpp") or filename.endswith(".h"):
- source = os.path.join(root, filename)
- # Disabled files
- if reduce(lambda result, exclude: source.endswith(exclude) or result, ignore_files, False):
- continue
- # Update contents.
- with open(source, "r+") as f:
- contents = f.read()
- contents = contents.replace("USE_CUDA", "USE_ROCM")
- contents = contents.replace("CUDA_VERSION", "0")
- f.seek(0)
- f.write(contents)
- f.truncate()
- f.flush()
- os.fsync(f)
-
-hipify_python.hipify(
- project_directory=proj_dir,
- output_directory=proj_dir,
- includes=includes,
- ignores=ignores,
- json_settings=json_file,
- add_static_casts_option=True,
- show_progress=False)
("c10/cuda/CUDAMacros.h", ("c10/hip/HIPMacros.h", API_C10)),
("c10/cuda/CUDAMathCompat.h", ("c10/hip/HIPMathCompat.h", API_C10)),
("c10/cuda/CUDAFunctions.h", ("c10/hip/HIPFunctions.h", API_C10)),
+ ("c10/cuda/impl/CUDATest.h", ("c10/hip/impl/HIPTest.h", API_C10)),
+ ("c10/cuda/impl/cuda_cmake_macros.h", ("c10/hip/impl/hip_cmake_macros.h", API_C10)),
("C10_CUDA_CHECK", ("C10_HIP_CHECK", API_C10)),
("c10::cuda", ("c10::hip", API_C10)),
# This substitution is not permissible, because there's another copy of this
EMPTYBODY = 6
-def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), hipify_caffe2=False):
+def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), out_of_place_only=False):
def _fnmatch(filepath, patterns):
return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns)
for filename in filenames:
filepath = os.path.join(rel_dirpath, filename)
if _fnmatch(filepath, includes) and (not _fnmatch(filepath, ignores)) and match_extensions(filepath):
- if hipify_caffe2 and not is_caffe2_gpu_file(filepath):
+ if not is_pytorch_file(filepath) and not is_caffe2_gpu_file(filepath):
+ continue
+ if out_of_place_only and not is_out_of_place(filepath):
continue
-
yield filepath
return os.path.join(dirpath, root + ext)
-# Keep this synchronized with includes/ignores in build_pytorch_amd.py
+def is_out_of_place(filepath):
+ return not is_pytorch_file(filepath)
+
+
+# Keep this synchronized with includes/ignores in build_amd.py
def is_pytorch_file(filepath):
if filepath.startswith("aten/"):
if filepath.startswith("aten/src/ATen/core/"):
return in_txt
-def get_kernel_template_params(output_directory, the_file, KernelDictionary, template_param_to_value):
+# Note [PyTorch and Caffe2 kernel name clobber]
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# For some reason, the static_cast logic in pyHIPIFY assumes all kernels
+# have unique names. This may be true internally within PyTorch and
+# Caffe2, but it is not true across PyTorch and Caffe2. The metadata
+# in these cases clobbers each other.
+#
+# To prevent this happening, KernelTemplateParams is distinguished
+# by a boolean saying if it is a PyTorch kernel or a Caffe2 kernel.
+# We can't do a more fine-grained distinction, e.g., the filename,
+# because we need to work on the kernel from files distinct from
+# the one they were originally defined in (that's why this is done
+# in two passes).
+#
+# We can soon kill static_cast handling entirely, as hcc will support
+# this properly. So don't bother refactoring this code; it will
+# get deleted soon.
+
+
+def get_kernel_template_params(output_directory, the_file, KernelTemplateParams, template_param_to_value):
"""Scan for __global__ kernel definitions then extract its argument types, and static cast as necessary"""
# Read the kernel file.
with openf(os.path.join(output_directory, the_file), "r") as f:
for idx, arg_type in enumerate(argument_types):
formatted_args[idx] = arg_type
- KernelDictionary[kernel_name] = {"kernel_with_template": kernel_with_template, "arg_types": formatted_args}
+ # See Note [PyTorch and Caffe2 kernel name clobber]
+ KernelTemplateParams[(is_pytorch_file(the_file), kernel_name)] = {"kernel_with_template": kernel_with_template, "arg_types": formatted_args}
# Extract generated kernels
# curandStateMtgp32 *state, int size, T *result, ARG1
kernel_args = {1: "int", 2: "{0} *".format(kernel_params[0]), 3: kernel_params[1], 4: kernel_params[2]}
# Argument at position 1 should be int
- KernelDictionary[kernel_name] = {"kernel_with_template": kernel_name, "arg_types": kernel_args}
+ # See Note [PyTorch and Caffe2 kernel name clobber]
+ KernelTemplateParams[(is_pytorch_file(the_file), kernel_name)] = {"kernel_with_template": kernel_name, "arg_types": kernel_args}
def disable_unsupported_function_call(function, input_string, replacement):
# Add static_cast to ensure that the type of kernel arguments matches that in the corresponding kernel definition
-def add_static_casts(filepath, KernelTemplateParams):
+def add_static_casts(orig_filepath, filepath, KernelTemplateParams):
"""Add static casts to kernel launches in order to keep launch argument types and kernel definition types matching.
Example:
original_kernel_name_with_template = argument_strings[0].strip()
kernel_name = original_kernel_name_with_template.split("<")[0].strip()
ignore = ["upscale"]
- if kernel_name in KernelTemplateParams and kernel_name not in ignore:
+ if (is_pytorch_file(orig_filepath), kernel_name) in KernelTemplateParams and kernel_name not in ignore:
# Add template to the kernel
# Add static_casts to relevant arguments
- kernel_name_with_template = KernelTemplateParams[kernel_name]["kernel_with_template"]
- argument_types = KernelTemplateParams[kernel_name]["arg_types"]
+ # See Note [PyTorch and Caffe2 kernel name clobber]
+ params = KernelTemplateParams[(is_pytorch_file(orig_filepath), kernel_name)]
+ kernel_name_with_template = params["kernel_with_template"]
+ argument_types = params["arg_types"]
# The first 5 arguments are simply (function, number blocks, dimension blocks, shared memory, stream)
# old_kernel_launch_parameters - will contain the actual arguments to the function itself.
required=False)
parser.add_argument(
- '--hipify_caffe2',
+ '--out-of-place-only',
type=str2bool,
default=False,
- help="Whether to hipify caffe2 source",
- required=False)
+ help="Whether to only run hipify out-of-place on source files",
+ required=False),
parser.add_argument(
'--ignores',
includes=args.includes,
json_settings=args.json_settings,
add_static_casts_option=args.add_static_casts,
- hipify_caffe2=args.hipify_caffe2,
+ out_of_place_only=args.out_of_place_only,
ignores=args.ignores,
show_progress=args.show_progress)
def hipify(
project_directory,
show_detailed=False,
- extensions=(".cu", ".cuh", ".c", ".cpp", ".h", ".in", ".hpp"),
+ extensions=(".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"),
output_directory="",
includes=(),
json_settings="",
add_static_casts_option=False,
- hipify_caffe2=False,
+ out_of_place_only=False,
ignores=(),
show_progress=True,
):
all_files = list(matched_files_iter(output_directory, includes=includes,
ignores=ignores, extensions=extensions,
- hipify_caffe2=hipify_caffe2))
+ out_of_place_only=out_of_place_only))
# Start Preprocessor
preprocess(
# Execute the Clang Tool to Automatically add static casts
for filepath in all_files:
add_static_casts(
+ filepath,
os.path.join(
output_directory,
get_hip_file_path(filepath)),