From: Peter Bell Date: Mon, 23 Aug 2021 19:05:51 +0000 (-0700) Subject: Kill THCUNN (#63429) X-Git-Tag: accepted/tizen/8.0/unified/20231005.095509~810 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=560cd881956bbf425251d63f0ff0f9085a759447;p=platform%2Fupstream%2Fpytorch.git Kill THCUNN (#63429) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63429 Test Plan: Imported from OSS Reviewed By: mruberry Differential Revision: D30441308 Pulled By: ngimel fbshipit-source-id: 3ae342a2f8d5c7f8827b637c4055c5d1b0a1be26 --- diff --git a/BUILD.bazel b/BUILD.bazel index dab2275..5acbe40 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -410,21 +410,6 @@ filegroup( ) filegroup( - name = "thcunn_srcs_cu", - srcs = [ - "aten/src/THCUNN/BCECriterion.cu.cc", - "aten/src/THCUNN/ELU.cu.cc", - "aten/src/THCUNN/HardTanh.cu.cc", - "aten/src/THCUNN/LeakyReLU.cu.cc", - "aten/src/THCUNN/MultiMarginCriterion.cu.cc", - "aten/src/THCUNN/SoftMarginCriterion.cu.cc", - "aten/src/THCUNN/SoftPlus.cu.cc", - "aten/src/THCUNN/SoftShrink.cu.cc", - "aten/src/THCUNN/Tanh.cu.cc", - ], -) - -filegroup( name = "aten_srcs_cu", srcs = [ "aten/src/ATen/cuda/detail/IndexUtils.cu.cc", @@ -573,8 +558,6 @@ cc_library( "aten/src/THC/**/*.cpp", "aten/src/THC/*.cuh", "aten/src/THC/generic/*.cu.cc", - "aten/src/THCUNN/*.cuh", - "aten/src/THCUNN/generic/*.cu.cc", ], exclude = [ "aten/src/ATen/Config.h", @@ -716,7 +699,6 @@ cu_library( srcs = [ ":aten_srcs_cu", ":thc_srcs_cu", - ":thcunn_srcs_cu", ], copts = ATEN_COPTS + torch_cuda_half_options, visibility = ["//visibility:public"], diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7d8659a..e1a049c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -242,8 +242,7 @@ into the repo directory. * [aten](aten) - C++ tensor library for PyTorch (no autograd support) * [src](aten/src) - [README](aten/src/README.md) * [TH](aten/src/TH) - [THC](aten/src/THC) - [THCUNN](aten/src/THCUNN) - Legacy library code from the original + [THC](aten/src/THC) - Legacy library code from the original Torch. Try not to add things here; we're slowly porting these to [native](aten/src/ATen/native). * generic - Contains actual implementations of operators, diff --git a/README.md b/README.md index 53ebfb1..9b2a854 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ We hope you never spend hours debugging your code because of bad stack traces or PyTorch has minimal framework overhead. We integrate acceleration libraries such as [Intel MKL](https://software.intel.com/mkl) and NVIDIA ([cuDNN](https://developer.nvidia.com/cudnn), [NCCL](https://developer.nvidia.com/nccl)) to maximize speed. At the core, its CPU and GPU Tensor and neural network backends -(TH, THC, THNN, THCUNN) are mature and have been tested for years. +are mature and have been tested for years. Hence, PyTorch is quite fast – whether you run small or large neural networks. diff --git a/aten/CMakeLists.txt b/aten/CMakeLists.txt index 400b00f..7ba92a6 100644 --- a/aten/CMakeLists.txt +++ b/aten/CMakeLists.txt @@ -80,21 +80,14 @@ if(USE_ROCM) # ATen proper) set(AT_CUDA_ENABLED 1) add_subdirectory(src/THH) - add_subdirectory(src/THHUNN) message("ROCm is enabled.") elseif(USE_CUDA) set(AT_CUDA_ENABLED 1) add_subdirectory(src/THC) - add_subdirectory(src/THCUNN) else() message("disabling CUDA because USE_CUDA is set false") set(AT_CUDA_ENABLED 0) endif() -if(NOT USE_CUDA) - # we still parse THCUNN even if cuda is disabled to make sure to - # install it - install(FILES src/THCUNN/generic/THCUNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THCUNN/generic") -endif() if(NOT USE_NNPACK) set(AT_NNPACK_ENABLED 0) diff --git a/aten/src/ATen/TensorUtils.cpp b/aten/src/ATen/TensorUtils.cpp index af9a8a1..1ec9f9c 100644 --- a/aten/src/ATen/TensorUtils.cpp +++ b/aten/src/ATen/TensorUtils.cpp @@ -282,7 +282,6 @@ bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides) { return contig_if_nonempty; } -// Correspond to THCUNN_check_dim_size/THNN_check_dim_size void check_dim_size( const Tensor& tensor, int64_t dim, diff --git a/aten/src/ATen/TensorUtils.h b/aten/src/ATen/TensorUtils.h index 8e84eca..1417174 100644 --- a/aten/src/ATen/TensorUtils.h +++ b/aten/src/ATen/TensorUtils.h @@ -144,7 +144,6 @@ TORCH_API void* maybe_data_ptr(const TensorArg& tensor); // on whether a subgeometry is contiguous. TORCH_API bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides); -// Correspond to THCUNN_check_dim_size/THNN_check_dim_size TORCH_API void check_dim_size( const Tensor& tensor, int64_t dim, diff --git a/aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp b/aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp index 4ead51e..0ad6dc8 100644 --- a/aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp +++ b/aten/src/ATen/cuda/LegacyTHFunctionsCUDA.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #undef THNN_ #undef THCIndexTensor_ #include diff --git a/aten/src/ATen/cuda/detail/KernelUtils.h b/aten/src/ATen/cuda/detail/KernelUtils.h index 836504a..91a61b0 100644 --- a/aten/src/ATen/cuda/detail/KernelUtils.h +++ b/aten/src/ATen/cuda/detail/KernelUtils.h @@ -2,9 +2,6 @@ #include -// Contents of this file are copied from THCUNN/common.h for the ease of porting -// THCUNN functions into ATen. - namespace at { namespace cuda { namespace detail { // CUDA: grid stride looping diff --git a/aten/src/README.md b/aten/src/README.md index e3e0151..183ec09 100644 --- a/aten/src/README.md +++ b/aten/src/README.md @@ -7,7 +7,6 @@ multiple variants of the library, summarized here: * TH = TorcH * THC = TorcH Cuda * THCS = TorcH Cuda Sparse (now defunct) -* THCUNN = TorcH CUda Neural Network (see cunn) * THNN = TorcH Neural Network (now defunct) * THS = TorcH Sparse (now defunct) diff --git a/aten/src/THCUNN/CMakeLists.txt b/aten/src/THCUNN/CMakeLists.txt deleted file mode 100644 index f84005e..0000000 --- a/aten/src/THCUNN/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} -PARENT_SCOPE) - -set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} - "${CMAKE_CURRENT_SOURCE_DIR}" -PARENT_SCOPE) - -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - DESTINATION ${ATEN_INSTALL_INCLUDE_SUBDIR} - FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh") diff --git a/aten/src/THCUNN/README.md b/aten/src/THCUNN/README.md deleted file mode 100644 index 5c46623..0000000 --- a/aten/src/THCUNN/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# THCUNN - -THCUNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions; most users will want to use ATen, which provides a C++ wrapper around these functions. - - -Looking to add an implementation? Consider writing an ATen native function -instead! See [../ATen/native](../ATen/native). - -## Links - -* [API reference](doc/api_reference.md) -* [Style guidelines](doc/style_guidelines.md) - -## API - -THCUNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations: - -* **updateOutput** - applies the module to an input -* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input -* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters - -For information on argument types please check the [API reference](doc/api_reference.md). - -## Developer docs - -* [Style guidelines](doc/style_guidelines.md) diff --git a/aten/src/THCUNN/SharedMem.cuh b/aten/src/THCUNN/SharedMem.cuh deleted file mode 100644 index 8d83d9f..0000000 --- a/aten/src/THCUNN/SharedMem.cuh +++ /dev/null @@ -1,43 +0,0 @@ -// Based on the simpleTempltes CUDA example - -#ifndef THCUNN_SHAREDMEM_H -#define THCUNN_SHAREDMEM_H - -template -struct SharedMem { - __device__ T *getPointer() - { - extern __device__ void error(void); - error(); - return NULL; - } -}; - -template <> -struct SharedMem -{ - __device__ half *getPointer() { - extern __shared__ half s_half[]; - return s_half; - } -}; - -template <> -struct SharedMem -{ - __device__ float *getPointer() { - extern __shared__ float s_float[]; - return s_float; - } -}; - -template <> -struct SharedMem -{ - __device__ double *getPointer() { - extern __shared__ double s_double[]; - return s_double; - } -}; - -#endif diff --git a/aten/src/THCUNN/THCHalfAutoNumerics.cuh b/aten/src/THCUNN/THCHalfAutoNumerics.cuh deleted file mode 100644 index 62691b9..0000000 --- a/aten/src/THCUNN/THCHalfAutoNumerics.cuh +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef THC_HALF_AUTO_NUMERICS_INC -#define THC_HALF_AUTO_NUMERICS_INC - -#include -#include - -// WARNING: THCNumerics is being deprecated. Read the comments and function usage -// in THCNumerics to learn about the deprecation -// -// Half numerics functions defined as free functions, so cunn code can be -// written generically, i.e. without excessive calling of THCNumerics functions. - -// these functions should move to THCNumerics - -inline __host__ __device__ THHalf fmaxType(THHalf x, THHalf y) { - return THCNumerics::ge(x, y) ? x : y; -} - -inline __host__ __device__ float fmaxType(float x, THHalf y) { - return fmaxf(x, ScalarConvert::to(y)); -} - -inline __host__ __device__ float fmaxType(float x, float y) { - return fmaxf(x, y); -} - -inline __host__ __device__ double fmaxType(double x, double y) { - return fmax(x, y); -} - - -// arithmetic functions - -inline __host__ __device__ THHalf pow(THHalf a, THHalf b) { - return THCNumerics::pow(a, b); -} - -#endif diff --git a/aten/src/THCUNN/THCUNN.h b/aten/src/THCUNN/THCUNN.h deleted file mode 100644 index a4392dd..0000000 --- a/aten/src/THCUNN/THCUNN.h +++ /dev/null @@ -1,13 +0,0 @@ -#include - -#define THCIndexTensor THCudaLongTensor -#define THCIndexTensor_(NAME) THCudaLongTensor_ ## NAME -typedef int64_t THCIndex_t; - -#define THNN_(NAME) TH_CONCAT_3(THNN_, CReal, NAME) - -#include -#include - -#include -#include diff --git a/aten/src/THCUNN/common.h b/aten/src/THCUNN/common.h deleted file mode 100644 index 69b7f3a..0000000 --- a/aten/src/THCUNN/common.h +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef THCUNN_COMMON_H -#define THCUNN_COMMON_H - -#define THCUNN_assertSameGPU(...) THAssertMsg(THCTensor_(checkGPU)(__VA_ARGS__), \ - "Some of weight/gradient/input tensors are located on different GPUs. Please move them to a single one.") - -// Use 1024 threads per block, which requires cuda sm_2x or above -const int CUDA_NUM_THREADS = 1024; - -// CUDA: number of blocks for threads. -inline int GET_BLOCKS(const int64_t N) -{ - // Round up division for positive number - auto block_num = N / CUDA_NUM_THREADS + (N % CUDA_NUM_THREADS == 0 ? 0 : 1); - - constexpr int64_t max_int = std::numeric_limits::max(); - THAssertMsg(block_num <= max_int, "Can't schedule too many blocks on CUDA device"); - - return static_cast(block_num); -} - -#define THCUNN_resizeAs_indices(STATE, I1, I2) \ - if (!I1->sizes().equals(I2->sizes())) \ - { \ - THCudaLongTensor_resizeAs(STATE, I1, I2); \ - } - -#define THCUNN_check_shape(STATE, I1, I2) \ - if (I1 != NULL && I2 != NULL && !THCTensor_(isSameSizeAs)(STATE, I1, I2)) \ - { \ - THCDescBuff s1 = THCTensor_(sizeDesc)(STATE, I1); \ - THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \ - THError(#I1 " and " #I2 " shapes do not match: " \ - #I1 " %s, " #I2 " %s", s1.str, s2.str); \ - } - - -#define THCUNN_check_shape_indices(STATE, I1, I2) \ - if (!I1->sizes().equals(I2->sizes())) \ - { \ - THCDescBuff s1 = THCIndexTensor_(sizeDesc)(STATE, I1); \ - THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \ - THError(#I1 " and " #I2 " shapes do not match: " \ - #I1 " %s, " #I2 " %s", s1.str, s2.str); \ - } - -#define THCUNN_check_nElement(STATE, I1, I2) \ - if (I1 != NULL && I2 != NULL ) { \ - ptrdiff_t n1 = THCTensor_(nElement)(STATE, I1); \ - ptrdiff_t n2 = THCTensor_(nElement)(STATE, I2); \ - if (n1 != n2) \ - { \ - THCDescBuff s1 = THCTensor_(sizeDesc)(state, I1); \ - THCDescBuff s2 = THCTensor_(sizeDesc)(state, I2); \ - THError(#I1 " and " #I2 " have different number of elements: " \ - #I1 "%s has %ld elements, while " \ - #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \ - } \ - } - -#define THCUNN_check_dim_size(STATE, T, DIM, DIM_SIZE, SIZE) \ - if (THCTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \ - THCTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \ - THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \ - THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ - " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ - } - -#define THCUNN_check_dim_size_indices(STATE, T, DIM, DIM_SIZE, SIZE) \ - if (THCIndexTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \ - THCIndexTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \ - THCDescBuff s1 = THCIndexTensor_(sizeDesc)(state, T); \ - THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ - " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ - } - -#define THCUNN_argCheck(STATE, COND, ARG, T, FORMAT) \ - if (!(COND)) { \ - THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \ - THArgCheck(COND, ARG, FORMAT, s1.str); \ - } - -#endif diff --git a/aten/src/THCUNN/doc/api_reference.md b/aten/src/THCUNN/doc/api_reference.md deleted file mode 100644 index 3f49b9b..0000000 --- a/aten/src/THCUNN/doc/api_reference.md +++ /dev/null @@ -1,26 +0,0 @@ -# API docs - -This document describes the conventions behind the THCUNN API. - -### The API - -All functions provided by THCUNN are stored in `aten/src/THCUNN/generic/THCUNN.h`. -Look at this file. - -### Note on function names - -Please remember, that because C doesn't support function overloading, functions taking different tensor types have different names. So e.g. for an Abs module, there are actually two updateOutput functions: - -* `void THNN_FloatAbs_updateOutput(...)` -* `void THNN_DoubleAbs_updateOutput(...)` - -In these docs such function will be referred to as `void THCUNN_Abs_updateOutput(...)`, and it's up to developer to add a type prefix. `real` is an alias for that type. - -### Argument types - -Some arguments have additional tags placed in square brackets in their header declarations: - -* **[OUT]** - This is the output argument. It will be reshaped if needed. -* **[OPTIONAL]** - This argument is optional and can be safely set to NULL -* **[BUFFER]** - A buffer. `updateGradInput` and `accGradParameters` should get the same buffers that were used in `updateOutput` call. -* **[MODIFIED]** - Some functions accept an `inplace` flag. If set to true, this argument might be modified (in addition to the output). diff --git a/aten/src/THCUNN/doc/style_guidelines.md b/aten/src/THCUNN/doc/style_guidelines.md deleted file mode 100644 index 086db8b..0000000 --- a/aten/src/THCUNN/doc/style_guidelines.md +++ /dev/null @@ -1,64 +0,0 @@ -## API design guidelines - -Functions should return `void`. - -All functions should accept arguments in the following order. `...` represent any module-specific parameters or buffers, disregarding whether they are used for writing or reading. Arguments in `...` below should be ordered like this: -``` -[weight], [bias], [any buffers], [additional arguments], [optional arguments] -``` - -### Modules -``` -updateOutput: state, input, output, ... -updateGradInput: state, input, gradOutput, gradInput, ... -accGradParameters: state, input, gradOutput, [gradWeight], [gradBias], ... -``` - -e.g. -```C -void THNN_(ClassNLLCriterion_updateGradInput)( - THCState *state, - THCTensor *input, - THCIndexTensor *target, - THCTensor *gradOutput, - THCTensor *gradInput, - int64_t reduction, - THCTensor *weights, - THCTensor *total_weight, - int64_t ignore_index) -``` - -### Criterions -``` -updateOutput: state, input, target, output, ... -updateGradInput: state, input, target, gradInput, ... -``` - -e.g. - -```C -void THNN_(ClassNLLCriterion_updateOutput)( - THCState *state, - THCTensor *input, - THCIndexTensor *target, - THCTensor *output, - int64_t reduction, - THCTensor *weights, - THCTensor *total_weight, - int64_t ignore_index) -``` - -## Code style guide - -```C -void THNN_(GatedLinear_updateOutput)( - THCState *state, - THCTensor *input, - THCTensor *output, - int dim) -//<- 10 -> -``` - -All arguments should start on a new line after function name, and they should be indented using 10 spaces. - -Use 2 spaces for block indentation. diff --git a/aten/src/THCUNN/generic/THCUNN.h b/aten/src/THCUNN/generic/THCUNN.h deleted file mode 100644 index d624fdd..0000000 --- a/aten/src/THCUNN/generic/THCUNN.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef THC_GENERIC_FILE -#define THC_GENERIC_FILE "THCUNN/generic/THCUNN.h" -#else - -#include -#include - -TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateOutput)( - THCState* state, - THCTensor* input, - THCIndexTensor* target, - THCTensor* output, - int64_t reduction, - int p, - THCTensor* weights, // [OPTIONAL] - accreal margin); - -TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateGradInput)( - THCState* state, - THCTensor* input, - THCIndexTensor* target, - THCTensor* gradOutput, - THCTensor* gradInput, - int64_t reduction, - int p, - THCTensor* weights, // [OPTIONAL] - accreal margin); - -#endif diff --git a/setup.py b/setup.py index 8135e1e..a200982 100644 --- a/setup.py +++ b/setup.py @@ -1028,8 +1028,6 @@ if __name__ == '__main__': 'include/THC/*.cuh', 'include/THC/*.h*', 'include/THC/generic/*.h', - 'include/THCUNN/*.cuh', - 'include/THCUNN/generic/*.h', 'include/THH/*.cuh', 'include/THH/*.h*', 'include/THH/generic/*.h', diff --git a/tools/README.md b/tools/README.md index a28affa..e4aba38 100644 --- a/tools/README.md +++ b/tools/README.md @@ -15,10 +15,6 @@ Modern infrastructure: to import arbitrary Python files in a script, without having to add them to the PYTHONPATH first. -Legacy infrastructure (we should kill this): -* [cwrap](cwrap) - Implementation of legacy code generation for THNN/THCUNN. - This is used by nnwrap. - Build system pieces: * [setup_helpers](setup_helpers) - Helper code for searching for diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py index 8cfecda..70f7e7a 100755 --- a/tools/amd_build/build_amd.py +++ b/tools/amd_build/build_amd.py @@ -81,12 +81,10 @@ includes = [ "aten/src/ATen/native/sparse/cuda/*", "aten/src/ATen/native/quantized/cuda/*", "aten/src/THC/*", - "aten/src/THCUNN/*", "aten/src/ATen/test/*", # CMakeLists.txt isn't processed by default, but there are a few # we do want to handle, so explicitly specify them "aten/src/THC/CMakeLists.txt", - "aten/src/THCUNN/CMakeLists.txt", "torch/*", "tools/autograd/templates/python_variable_methods.cpp", ] diff --git a/torch/utils/hipify/hipify_python.py b/torch/utils/hipify/hipify_python.py index 6697f1e..ad2903f 100644 --- a/torch/utils/hipify/hipify_python.py +++ b/torch/utils/hipify/hipify_python.py @@ -750,7 +750,6 @@ def preprocessor( or f.startswith("ATen/native/quantized/cuda") or f.startswith("ATen/native/sparse/cuda") or f.startswith("THC/") - or f.startswith("THCUNN/") or (f.startswith("THC") and not f.startswith("THCP")) ): return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension))