)
filegroup(
- name = "thcunn_srcs_cu",
- srcs = [
- "aten/src/THCUNN/BCECriterion.cu.cc",
- "aten/src/THCUNN/ELU.cu.cc",
- "aten/src/THCUNN/HardTanh.cu.cc",
- "aten/src/THCUNN/LeakyReLU.cu.cc",
- "aten/src/THCUNN/MultiMarginCriterion.cu.cc",
- "aten/src/THCUNN/SoftMarginCriterion.cu.cc",
- "aten/src/THCUNN/SoftPlus.cu.cc",
- "aten/src/THCUNN/SoftShrink.cu.cc",
- "aten/src/THCUNN/Tanh.cu.cc",
- ],
-)
-
-filegroup(
name = "aten_srcs_cu",
srcs = [
"aten/src/ATen/cuda/detail/IndexUtils.cu.cc",
"aten/src/THC/**/*.cpp",
"aten/src/THC/*.cuh",
"aten/src/THC/generic/*.cu.cc",
- "aten/src/THCUNN/*.cuh",
- "aten/src/THCUNN/generic/*.cu.cc",
],
exclude = [
"aten/src/ATen/Config.h",
srcs = [
":aten_srcs_cu",
":thc_srcs_cu",
- ":thcunn_srcs_cu",
],
copts = ATEN_COPTS + torch_cuda_half_options,
visibility = ["//visibility:public"],
* [aten](aten) - C++ tensor library for PyTorch (no autograd support)
* [src](aten/src) - [README](aten/src/README.md)
* [TH](aten/src/TH)
- [THC](aten/src/THC)
- [THCUNN](aten/src/THCUNN) - Legacy library code from the original
+ [THC](aten/src/THC) - Legacy library code from the original
Torch. Try not to add things here; we're slowly porting these to
[native](aten/src/ATen/native).
* generic - Contains actual implementations of operators,
PyTorch has minimal framework overhead. We integrate acceleration libraries
such as [Intel MKL](https://software.intel.com/mkl) and NVIDIA ([cuDNN](https://developer.nvidia.com/cudnn), [NCCL](https://developer.nvidia.com/nccl)) to maximize speed.
At the core, its CPU and GPU Tensor and neural network backends
-(TH, THC, THNN, THCUNN) are mature and have been tested for years.
+are mature and have been tested for years.
Hence, PyTorch is quite fast – whether you run small or large neural networks.
# ATen proper)
set(AT_CUDA_ENABLED 1)
add_subdirectory(src/THH)
- add_subdirectory(src/THHUNN)
message("ROCm is enabled.")
elseif(USE_CUDA)
set(AT_CUDA_ENABLED 1)
add_subdirectory(src/THC)
- add_subdirectory(src/THCUNN)
else()
message("disabling CUDA because USE_CUDA is set false")
set(AT_CUDA_ENABLED 0)
endif()
-if(NOT USE_CUDA)
- # we still parse THCUNN even if cuda is disabled to make sure to
- # install it
- install(FILES src/THCUNN/generic/THCUNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THCUNN/generic")
-endif()
if(NOT USE_NNPACK)
set(AT_NNPACK_ENABLED 0)
return contig_if_nonempty;
}
-// Correspond to THCUNN_check_dim_size/THNN_check_dim_size
void check_dim_size(
const Tensor& tensor,
int64_t dim,
// on whether a subgeometry is contiguous.
TORCH_API bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides);
-// Correspond to THCUNN_check_dim_size/THNN_check_dim_size
TORCH_API void check_dim_size(
const Tensor& tensor,
int64_t dim,
#include <ATen/ExpandUtils.h>
#include <THC/THC.h>
#include <THC/THCTensor.hpp>
-#include <THCUNN/THCUNN.h>
#undef THNN_
#undef THCIndexTensor_
#include <ATen/DeviceGuard.h>
#include <ATen/ATen.h>
-// Contents of this file are copied from THCUNN/common.h for the ease of porting
-// THCUNN functions into ATen.
-
namespace at { namespace cuda { namespace detail {
// CUDA: grid stride looping
* TH = TorcH
* THC = TorcH Cuda
* THCS = TorcH Cuda Sparse (now defunct)
-* THCUNN = TorcH CUda Neural Network (see cunn)
* THNN = TorcH Neural Network (now defunct)
* THS = TorcH Sparse (now defunct)
+++ /dev/null
-set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
-PARENT_SCOPE)
-
-set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
- "${CMAKE_CURRENT_SOURCE_DIR}"
-PARENT_SCOPE)
-
-install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- DESTINATION ${ATEN_INSTALL_INCLUDE_SUBDIR}
- FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh")
+++ /dev/null
-# THCUNN
-
-THCUNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions; most users will want to use ATen, which provides a C++ wrapper around these functions.
-
-
-Looking to add an implementation? Consider writing an ATen native function
-instead! See [../ATen/native](../ATen/native).
-
-## Links
-
-* [API reference](doc/api_reference.md)
-* [Style guidelines](doc/style_guidelines.md)
-
-## API
-
-THCUNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations:
-
-* **updateOutput** - applies the module to an input
-* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input
-* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters
-
-For information on argument types please check the [API reference](doc/api_reference.md).
-
-## Developer docs
-
-* [Style guidelines](doc/style_guidelines.md)
+++ /dev/null
-// Based on the simpleTempltes CUDA example
-
-#ifndef THCUNN_SHAREDMEM_H
-#define THCUNN_SHAREDMEM_H
-
-template <typename T>
-struct SharedMem {
- __device__ T *getPointer()
- {
- extern __device__ void error(void);
- error();
- return NULL;
- }
-};
-
-template <>
-struct SharedMem<half>
-{
- __device__ half *getPointer() {
- extern __shared__ half s_half[];
- return s_half;
- }
-};
-
-template <>
-struct SharedMem<float>
-{
- __device__ float *getPointer() {
- extern __shared__ float s_float[];
- return s_float;
- }
-};
-
-template <>
-struct SharedMem<double>
-{
- __device__ double *getPointer() {
- extern __shared__ double s_double[];
- return s_double;
- }
-};
-
-#endif
+++ /dev/null
-#ifndef THC_HALF_AUTO_NUMERICS_INC
-#define THC_HALF_AUTO_NUMERICS_INC
-
-#include <TH/THHalf.h>
-#include <THC/THCNumerics.cuh>
-
-// WARNING: THCNumerics is being deprecated. Read the comments and function usage
-// in THCNumerics to learn about the deprecation
-//
-// Half numerics functions defined as free functions, so cunn code can be
-// written generically, i.e. without excessive calling of THCNumerics<THHalf> functions.
-
-// these functions should move to THCNumerics
-
-inline __host__ __device__ THHalf fmaxType(THHalf x, THHalf y) {
- return THCNumerics<THHalf>::ge(x, y) ? x : y;
-}
-
-inline __host__ __device__ float fmaxType(float x, THHalf y) {
- return fmaxf(x, ScalarConvert<THHalf, float>::to(y));
-}
-
-inline __host__ __device__ float fmaxType(float x, float y) {
- return fmaxf(x, y);
-}
-
-inline __host__ __device__ double fmaxType(double x, double y) {
- return fmax(x, y);
-}
-
-
-// arithmetic functions
-
-inline __host__ __device__ THHalf pow(THHalf a, THHalf b) {
- return THCNumerics<THHalf>::pow(a, b);
-}
-
-#endif
+++ /dev/null
-#include <THC/THC.h>
-
-#define THCIndexTensor THCudaLongTensor
-#define THCIndexTensor_(NAME) THCudaLongTensor_ ## NAME
-typedef int64_t THCIndex_t;
-
-#define THNN_(NAME) TH_CONCAT_3(THNN_, CReal, NAME)
-
-#include <THCUNN/generic/THCUNN.h>
-#include <THC/THCGenerateFloatTypes.h>
-
-#include <THCUNN/generic/THCUNN.h>
-#include <THC/THCGenerateBFloat16Type.h>
+++ /dev/null
-#ifndef THCUNN_COMMON_H
-#define THCUNN_COMMON_H
-
-#define THCUNN_assertSameGPU(...) THAssertMsg(THCTensor_(checkGPU)(__VA_ARGS__), \
- "Some of weight/gradient/input tensors are located on different GPUs. Please move them to a single one.")
-
-// Use 1024 threads per block, which requires cuda sm_2x or above
-const int CUDA_NUM_THREADS = 1024;
-
-// CUDA: number of blocks for threads.
-inline int GET_BLOCKS(const int64_t N)
-{
- // Round up division for positive number
- auto block_num = N / CUDA_NUM_THREADS + (N % CUDA_NUM_THREADS == 0 ? 0 : 1);
-
- constexpr int64_t max_int = std::numeric_limits<int>::max();
- THAssertMsg(block_num <= max_int, "Can't schedule too many blocks on CUDA device");
-
- return static_cast<int>(block_num);
-}
-
-#define THCUNN_resizeAs_indices(STATE, I1, I2) \
- if (!I1->sizes().equals(I2->sizes())) \
- { \
- THCudaLongTensor_resizeAs(STATE, I1, I2); \
- }
-
-#define THCUNN_check_shape(STATE, I1, I2) \
- if (I1 != NULL && I2 != NULL && !THCTensor_(isSameSizeAs)(STATE, I1, I2)) \
- { \
- THCDescBuff s1 = THCTensor_(sizeDesc)(STATE, I1); \
- THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \
- THError(#I1 " and " #I2 " shapes do not match: " \
- #I1 " %s, " #I2 " %s", s1.str, s2.str); \
- }
-
-
-#define THCUNN_check_shape_indices(STATE, I1, I2) \
- if (!I1->sizes().equals(I2->sizes())) \
- { \
- THCDescBuff s1 = THCIndexTensor_(sizeDesc)(STATE, I1); \
- THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \
- THError(#I1 " and " #I2 " shapes do not match: " \
- #I1 " %s, " #I2 " %s", s1.str, s2.str); \
- }
-
-#define THCUNN_check_nElement(STATE, I1, I2) \
- if (I1 != NULL && I2 != NULL ) { \
- ptrdiff_t n1 = THCTensor_(nElement)(STATE, I1); \
- ptrdiff_t n2 = THCTensor_(nElement)(STATE, I2); \
- if (n1 != n2) \
- { \
- THCDescBuff s1 = THCTensor_(sizeDesc)(state, I1); \
- THCDescBuff s2 = THCTensor_(sizeDesc)(state, I2); \
- THError(#I1 " and " #I2 " have different number of elements: " \
- #I1 "%s has %ld elements, while " \
- #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \
- } \
- }
-
-#define THCUNN_check_dim_size(STATE, T, DIM, DIM_SIZE, SIZE) \
- if (THCTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \
- THCTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \
- THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \
- THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
- " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
- }
-
-#define THCUNN_check_dim_size_indices(STATE, T, DIM, DIM_SIZE, SIZE) \
- if (THCIndexTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \
- THCIndexTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \
- THCDescBuff s1 = THCIndexTensor_(sizeDesc)(state, T); \
- THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
- " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
- }
-
-#define THCUNN_argCheck(STATE, COND, ARG, T, FORMAT) \
- if (!(COND)) { \
- THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \
- THArgCheck(COND, ARG, FORMAT, s1.str); \
- }
-
-#endif
+++ /dev/null
-# API docs
-
-This document describes the conventions behind the THCUNN API.
-
-### The API
-
-All functions provided by THCUNN are stored in `aten/src/THCUNN/generic/THCUNN.h`.
-Look at this file.
-
-### Note on function names
-
-Please remember, that because C doesn't support function overloading, functions taking different tensor types have different names. So e.g. for an Abs module, there are actually two updateOutput functions:
-
-* `void THNN_FloatAbs_updateOutput(...)`
-* `void THNN_DoubleAbs_updateOutput(...)`
-
-In these docs such function will be referred to as `void THCUNN_Abs_updateOutput(...)`, and it's up to developer to add a type prefix. `real` is an alias for that type.
-
-### Argument types
-
-Some arguments have additional tags placed in square brackets in their header declarations:
-
-* **[OUT]** - This is the output argument. It will be reshaped if needed.
-* **[OPTIONAL]** - This argument is optional and can be safely set to NULL
-* **[BUFFER]** - A buffer. `updateGradInput` and `accGradParameters` should get the same buffers that were used in `updateOutput` call.
-* **[MODIFIED]** - Some functions accept an `inplace` flag. If set to true, this argument might be modified (in addition to the output).
+++ /dev/null
-## API design guidelines
-
-Functions should return `void`.
-
-All functions should accept arguments in the following order. `...` represent any module-specific parameters or buffers, disregarding whether they are used for writing or reading. Arguments in `...` below should be ordered like this:
-```
-[weight], [bias], [any buffers], [additional arguments], [optional arguments]
-```
-
-### Modules
-```
-updateOutput: state, input, output, ...
-updateGradInput: state, input, gradOutput, gradInput, ...
-accGradParameters: state, input, gradOutput, [gradWeight], [gradBias], ...
-```
-
-e.g.
-```C
-void THNN_(ClassNLLCriterion_updateGradInput)(
- THCState *state,
- THCTensor *input,
- THCIndexTensor *target,
- THCTensor *gradOutput,
- THCTensor *gradInput,
- int64_t reduction,
- THCTensor *weights,
- THCTensor *total_weight,
- int64_t ignore_index)
-```
-
-### Criterions
-```
-updateOutput: state, input, target, output, ...
-updateGradInput: state, input, target, gradInput, ...
-```
-
-e.g.
-
-```C
-void THNN_(ClassNLLCriterion_updateOutput)(
- THCState *state,
- THCTensor *input,
- THCIndexTensor *target,
- THCTensor *output,
- int64_t reduction,
- THCTensor *weights,
- THCTensor *total_weight,
- int64_t ignore_index)
-```
-
-## Code style guide
-
-```C
-void THNN_(GatedLinear_updateOutput)(
- THCState *state,
- THCTensor *input,
- THCTensor *output,
- int dim)
-//<- 10 ->
-```
-
-All arguments should start on a new line after function name, and they should be indented using 10 spaces.
-
-Use 2 spaces for block indentation.
+++ /dev/null
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "THCUNN/generic/THCUNN.h"
-#else
-
-#include <ATen/core/Reduction.h>
-#include <ATen/Generator.h>
-
-TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateOutput)(
- THCState* state,
- THCTensor* input,
- THCIndexTensor* target,
- THCTensor* output,
- int64_t reduction,
- int p,
- THCTensor* weights, // [OPTIONAL]
- accreal margin);
-
-TORCH_CUDA_CU_API void THNN_(MultiMarginCriterion_updateGradInput)(
- THCState* state,
- THCTensor* input,
- THCIndexTensor* target,
- THCTensor* gradOutput,
- THCTensor* gradInput,
- int64_t reduction,
- int p,
- THCTensor* weights, // [OPTIONAL]
- accreal margin);
-
-#endif
'include/THC/*.cuh',
'include/THC/*.h*',
'include/THC/generic/*.h',
- 'include/THCUNN/*.cuh',
- 'include/THCUNN/generic/*.h',
'include/THH/*.cuh',
'include/THH/*.h*',
'include/THH/generic/*.h',
to import arbitrary Python files in a script, without having to add
them to the PYTHONPATH first.
-Legacy infrastructure (we should kill this):
-* [cwrap](cwrap) - Implementation of legacy code generation for THNN/THCUNN.
- This is used by nnwrap.
-
Build system pieces:
* [setup_helpers](setup_helpers) - Helper code for searching for
"aten/src/ATen/native/sparse/cuda/*",
"aten/src/ATen/native/quantized/cuda/*",
"aten/src/THC/*",
- "aten/src/THCUNN/*",
"aten/src/ATen/test/*",
# CMakeLists.txt isn't processed by default, but there are a few
# we do want to handle, so explicitly specify them
"aten/src/THC/CMakeLists.txt",
- "aten/src/THCUNN/CMakeLists.txt",
"torch/*",
"tools/autograd/templates/python_variable_methods.cpp",
]
or f.startswith("ATen/native/quantized/cuda")
or f.startswith("ATen/native/sparse/cuda")
or f.startswith("THC/")
- or f.startswith("THCUNN/")
or (f.startswith("THC") and not f.startswith("THCP"))
):
return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension))