From 1ee66a5278f8c323210e051465d0c0cab2c9ccba Mon Sep 17 00:00:00 2001 From: Jane Xu Date: Fri, 17 Sep 2021 11:45:11 -0700 Subject: [PATCH] Remove CUDA 9.2 references conditionals and workarounds (#65070) Summary: Title says it all Pull Request resolved: https://github.com/pytorch/pytorch/pull/65070 Reviewed By: malfet Differential Revision: D30966464 Pulled By: janeyx99 fbshipit-source-id: e454906fd5d7d321d390939ba5d237e1d9b150f8 --- aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h | 3 +- c10/util/Optional.cpp | 4 - c10/util/Optional.h | 41 ---------- caffe2/core/operator.h | 6 -- test/cpp/jit/test_gpu.cpp | 98 +++++++++++------------ torch/__init__.py | 3 +- torch/autograd/profiler.py | 4 +- torch/csrc/api/include/torch/nn/cloneable.h | 15 +--- 8 files changed, 54 insertions(+), 120 deletions(-) diff --git a/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h b/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h index 72d2f65..2c48a0d 100644 --- a/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h +++ b/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h @@ -7,8 +7,7 @@ #include #include -#if defined(CUDART_VERSION) && defined(CUSOLVER_VERSION) && CUSOLVER_VERSION >= 10200 -// some cusolver functions don't work well on cuda 9.2 or cuda 10.1.105, cusolver is used on cuda >= 10.1.243 +#if defined(CUDART_VERSION) && defined(CUSOLVER_VERSION) #define USE_CUSOLVER #endif diff --git a/c10/util/Optional.cpp b/c10/util/Optional.cpp index dd78eee..b98dc56 100644 --- a/c10/util/Optional.cpp +++ b/c10/util/Optional.cpp @@ -3,9 +3,6 @@ #include -// CUDA 9.2 and below fail while trying to compile default move constructor -// see https://github.com/pytorch/csprng/issues/84 -#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200) static_assert( C10_IS_TRIVIALLY_COPYABLE(c10::optional), "c10::optional should be trivially copyable"); @@ -18,4 +15,3 @@ static_assert( static_assert( sizeof(c10::optional) == sizeof(c10::IntArrayRef), "c10::optional should be size-optimized"); -#endif diff --git a/c10/util/Optional.h b/c10/util/Optional.h index 7044c79..c2f87eb 100644 --- a/c10/util/Optional.h +++ b/c10/util/Optional.h @@ -499,9 +499,6 @@ template struct is_arrayref> : std::true_type {}; } // namespace detail_ -// CUDA 9.2 and below fail while trying to compile default move constructor -// see https://github.com/pytorch/csprng/issues/84 -#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200) template using OptionalBase = std::conditional_t< detail_::is_arrayref::value, @@ -524,23 +521,9 @@ using OptionalBase = std::conditional_t< // trivial // destructor optional_base>>>>; -#else -template -using OptionalBase = std::conditional_t< - detail_::is_arrayref::value, - arrayref_optional_base, - std::conditional_t< - std::is_trivially_destructible::value, // if possible - constexpr_optional_base>, // use base with - // trivial destructor - optional_base>>>; -#endif template class optional : private OptionalBase { -// CUDA 9.2 and below fail while trying to compile default move constructor -// see https://github.com/pytorch/csprng/issues/84 -#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200) template // re-declaration for nvcc on Windows. using OptionalBase = std::conditional_t< detail_::is_arrayref::value, @@ -565,17 +548,6 @@ class optional : private OptionalBase { // trivial // destructor optional_base>>>>; -#else - template - using OptionalBase = std::conditional_t< - detail_::is_arrayref::value, - arrayref_optional_base, - std::conditional_t< - std::is_trivially_destructible::value, // if possible - constexpr_optional_base>, // use base with - // trivial destructor - optional_base>>>; -#endif static_assert( !std::is_same::type, nullopt_t>::value, @@ -634,20 +606,7 @@ class optional : private OptionalBase { constexpr optional(nullopt_t) noexcept : OptionalBase(){}; optional(const optional& rhs) = default; - -// CUDA 9.2 and below fail while trying to compile default move constructor -// see https://github.com/pytorch/csprng/issues/84 -#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200) optional(optional&& rhs) = default; -#else - optional(optional&& rhs) noexcept( - std::is_nothrow_move_constructible::value) { - if (rhs.initialized()) { - ::new (static_cast(dataptr())) T(std::move(*rhs)); - OptionalBase::setInitialized(true); - } - } -#endif // see https://github.com/akrzemi1/Optional/issues/16 // and https://en.cppreference.com/w/cpp/utility/optional/optional, diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h index 15d1ead..b670845 100644 --- a/caffe2/core/operator.h +++ b/caffe2/core/operator.h @@ -731,14 +731,8 @@ inline vector OperatorBase::GetVectorFromIValueList( // OP_SINGLE_ARG provides a shorter initialization choice for initialization of // member variables for the class constructors. -// This is a workaround for CUDA9.2 and GCC7 -#if defined(CUDART_VERSION) && CUDART_VERSION >= 9020 && __GNUC__ >= 7 -#define OP_SINGLE_ARG(type, name, variable, default) \ - variable(this->template GetSingleArgument(name, (default))) -#else #define OP_SINGLE_ARG(type, name, variable, default) \ variable(OperatorBase::GetSingleArgument(name, (default))) -#endif // INPUT_TAGS and OUTPUT_TAGS are optional features to name the indices of the // operator's inputs and outputs, in order to avoid confusion. For example, for diff --git a/test/cpp/jit/test_gpu.cpp b/test/cpp/jit/test_gpu.cpp index 1a0ee7b..4674082 100644 --- a/test/cpp/jit/test_gpu.cpp +++ b/test/cpp/jit/test_gpu.cpp @@ -2606,44 +2606,40 @@ TEST(NVFuserTest, FusionUnaryOps_CUDA) { using OpTuple = std::tuple; - // [Note: explicit tuple type for uniform initialization list] - // Tuple type must be explicitly specified for each uniform initialization - // list within the vector to make this code compatible with some old env - // which we still need to support. eg. gcc 5.4 + cuda 9.2. std::vector ops{ - OpTuple{at::abs, UnaryOpType::Abs, "abs"}, - OpTuple{at::acos, UnaryOpType::Acos, "acos"}, - OpTuple{at::asin, UnaryOpType::Asin, "asin"}, - OpTuple{at::atan, UnaryOpType::Atan, "atan"}, + {at::abs, UnaryOpType::Abs, "abs"}, + {at::acos, UnaryOpType::Acos, "acos"}, + {at::asin, UnaryOpType::Asin, "asin"}, + {at::atan, UnaryOpType::Atan, "atan"}, // There does not appear to be an appropriate ATen function for atanh - // OpTuple{at::atanh, UnaryOpType::Atanh, "atanh" }, - OpTuple{at::ceil, UnaryOpType::Ceil, "ceil"}, - OpTuple{at::cos, UnaryOpType::Cos, "cos"}, - OpTuple{at::cosh, UnaryOpType::Cosh, "cosh"}, - OpTuple{at::erf, UnaryOpType::Erf, "erf"}, - OpTuple{at::erfc, UnaryOpType::Erfc, "erfc"}, - OpTuple{at::exp, UnaryOpType::Exp, "exp"}, - OpTuple{at::expm1, UnaryOpType::Expm1, "expm1"}, - OpTuple{at::floor, UnaryOpType::Floor, "floor"}, - OpTuple{at::frac, UnaryOpType::Frac, "frac"}, - OpTuple{at::gelu, UnaryOpType::Gelu, "gelu"}, - OpTuple{at::lgamma, UnaryOpType::Lgamma, "lgamma"}, - OpTuple{at::log, UnaryOpType::Log, "log"}, - OpTuple{at::log10, UnaryOpType::Log10, "log10"}, - OpTuple{at::log1p, UnaryOpType::Log1p, "log1p"}, - OpTuple{at::log2, UnaryOpType::Log2, "log2"}, - OpTuple{at::neg, UnaryOpType::Neg, "neg"}, - OpTuple{at::reciprocal, UnaryOpType::Reciprocal, "reciprocal"}, - OpTuple{at::relu, UnaryOpType::Relu, "relu"}, - OpTuple{at::round, UnaryOpType::Round, "round"}, - OpTuple{at::rsqrt, UnaryOpType::Rsqrt, "rsqrt"}, - OpTuple{at::sigmoid, UnaryOpType::Sigmoid, "sigmoid"}, - OpTuple{at::sin, UnaryOpType::Sin, "sin"}, - OpTuple{at::sinh, UnaryOpType::Sinh, "sinh"}, - OpTuple{at::sqrt, UnaryOpType::Sqrt, "sqrt"}, - OpTuple{at::tan, UnaryOpType::Tan, "tan"}, - OpTuple{at::tanh, UnaryOpType::Tanh, "tanh"}, - OpTuple{at::trunc, UnaryOpType::Trunc, "trunc"}}; + // {at::atanh, UnaryOpType::Atanh, "atanh" }, + {at::ceil, UnaryOpType::Ceil, "ceil"}, + {at::cos, UnaryOpType::Cos, "cos"}, + {at::cosh, UnaryOpType::Cosh, "cosh"}, + {at::erf, UnaryOpType::Erf, "erf"}, + {at::erfc, UnaryOpType::Erfc, "erfc"}, + {at::exp, UnaryOpType::Exp, "exp"}, + {at::expm1, UnaryOpType::Expm1, "expm1"}, + {at::floor, UnaryOpType::Floor, "floor"}, + {at::frac, UnaryOpType::Frac, "frac"}, + {at::gelu, UnaryOpType::Gelu, "gelu"}, + {at::lgamma, UnaryOpType::Lgamma, "lgamma"}, + {at::log, UnaryOpType::Log, "log"}, + {at::log10, UnaryOpType::Log10, "log10"}, + {at::log1p, UnaryOpType::Log1p, "log1p"}, + {at::log2, UnaryOpType::Log2, "log2"}, + {at::neg, UnaryOpType::Neg, "neg"}, + {at::reciprocal, UnaryOpType::Reciprocal, "reciprocal"}, + {at::relu, UnaryOpType::Relu, "relu"}, + {at::round, UnaryOpType::Round, "round"}, + {at::rsqrt, UnaryOpType::Rsqrt, "rsqrt"}, + {at::sigmoid, UnaryOpType::Sigmoid, "sigmoid"}, + {at::sin, UnaryOpType::Sin, "sin"}, + {at::sinh, UnaryOpType::Sinh, "sinh"}, + {at::sqrt, UnaryOpType::Sqrt, "sqrt"}, + {at::tan, UnaryOpType::Tan, "tan"}, + {at::tanh, UnaryOpType::Tanh, "tanh"}, + {at::trunc, UnaryOpType::Trunc, "trunc"}}; std::for_each(ops.begin(), ops.end(), [](OpTuple& op) { test_op( @@ -2680,14 +2676,13 @@ TEST(NVFuserTest, FusionBinaryOps_CUDA) { using AtenFuncSig = at::Tensor (*)(const at::Tensor&, const at::Tensor&); using OpTuple = std::tuple; - // see [Note: explicit tuple type for uniform initialization list] std::vector logic_ops{ - OpTuple{at::eq, BinaryOpType::Eq, "eq"}, - OpTuple{at::ge, BinaryOpType::GE, "ge"}, - OpTuple{at::gt, BinaryOpType::GT, "gt"}, - OpTuple{at::le, BinaryOpType::LE, "le"}, - OpTuple{at::lt, BinaryOpType::LT, "lt"}, - OpTuple{at::ne, BinaryOpType::NE, "ne"}}; + {at::eq, BinaryOpType::Eq, "eq"}, + {at::ge, BinaryOpType::GE, "ge"}, + {at::gt, BinaryOpType::GT, "gt"}, + {at::le, BinaryOpType::LE, "le"}, + {at::lt, BinaryOpType::LT, "lt"}, + {at::ne, BinaryOpType::NE, "ne"}}; std::for_each(logic_ops.begin(), logic_ops.end(), [](OpTuple& op) { test_op( @@ -2709,18 +2704,17 @@ TEST(NVFuserTest, FusionBinaryOps_CUDA) { std::make_pair(ValType::TensorView, DataType::Float))); }); - // see [Note: explicit tuple type for uniform initialization list] std::vector math_ops{ - OpTuple{at::atan2, BinaryOpType::Atan2, "atan2"}, - OpTuple{at::div, BinaryOpType::Div, "div"}, - OpTuple{at::fmod, BinaryOpType::Fmod, "fmod"}, - OpTuple{at::max, BinaryOpType::Max, "max"}, - OpTuple{at::min, BinaryOpType::Min, "min"}, - OpTuple{at::mul, BinaryOpType::Mul, "mul"}, - OpTuple{at::pow, BinaryOpType::Pow, "pow"}, + {at::atan2, BinaryOpType::Atan2, "atan2"}, + {at::div, BinaryOpType::Div, "div"}, + {at::fmod, BinaryOpType::Fmod, "fmod"}, + {at::max, BinaryOpType::Max, "max"}, + {at::min, BinaryOpType::Min, "min"}, + {at::mul, BinaryOpType::Mul, "mul"}, + {at::pow, BinaryOpType::Pow, "pow"}, // NOTE: Remainder does not match the Aten impl exactly // despite using an identical function. - OpTuple{at::remainder, BinaryOpType::Remainder, "remainder"}, + {at::remainder, BinaryOpType::Remainder, "remainder"}, }; std::for_each(math_ops.begin(), math_ops.end(), [](OpTuple& op) { diff --git a/torch/__init__.py b/torch/__init__.py index 5740b7a..18c9b08 100644 --- a/torch/__init__.py +++ b/torch/__init__.py @@ -106,8 +106,7 @@ if sys.platform == 'win32': try: ctypes.CDLL('vcruntime140.dll') ctypes.CDLL('msvcp140.dll') - if cuda_version not in ('9.2', '10.0'): - ctypes.CDLL('vcruntime140_1.dll') + ctypes.CDLL('vcruntime140_1.dll') except OSError: print('''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure. It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe''') diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py index c38ad99..c121b11 100644 --- a/torch/autograd/profiler.py +++ b/torch/autograd/profiler.py @@ -654,8 +654,8 @@ def parse_nvprof_trace(path): unique = EnforceUnique() for row in conn.execute(kernel_query): unique.see(row['marker_id'], row['runtime_id']) - # 211 is cudaKernelLaunch for cuda >= 9.2; 13 is for older cuda versions - assert (row['cbid'] == 211) or (row['cbid'] == 13) + # 211 is cudaKernelLaunch for cuda >= 9.2 + assert (row['cbid'] == 211) evt = functions_map[row['marker_id']] evt.append_kernel(row['kernel_name'], 0, diff --git a/torch/csrc/api/include/torch/nn/cloneable.h b/torch/csrc/api/include/torch/nn/cloneable.h index 463784a..cc735d6 100644 --- a/torch/csrc/api/include/torch/nn/cloneable.h +++ b/torch/csrc/api/include/torch/nn/cloneable.h @@ -42,12 +42,8 @@ class Cloneable : public virtual Module { copy->buffers_.clear(); copy->children_.clear(); copy->reset(); - // [[this pointer note]] - // Don't remove 'this' pointer, nvcc needs it to be explicitly given in some envs. - // eg. ubuntu 16.04 + gcc 5.x + cuda 9.2 - // ubuntu 16.04 + gcc 7.x + cuda 9.2 TORCH_CHECK( - copy->parameters_.size() == this->parameters_.size(), + copy->parameters_.size() == parameters_.size(), "The cloned module does not have the same number of " "parameters as the original module after calling reset(). " "Are you sure you called register_parameter() inside reset() " @@ -58,9 +54,8 @@ class Cloneable : public virtual Module { tensor.to(*device) : autograd::Variable(tensor).clone(); copy->parameters_[parameter.key()].set_data(data); } - // Don't remove 'this' pointer. See [[this pointer note]] TORCH_CHECK( - copy->buffers_.size() == this->buffers_.size(), + copy->buffers_.size() == buffers_.size(), "The cloned module does not have the same number of " "buffers as the original module after calling reset(). " "Are you sure you called register_buffer() inside reset() " @@ -71,15 +66,13 @@ class Cloneable : public virtual Module { tensor.to(*device) : autograd::Variable(tensor).clone(); copy->buffers_[buffer.key()].set_data(data); } - // Don't remove 'this' pointer. See [[this pointer note]] TORCH_CHECK( - copy->children_.size() == this->children_.size(), + copy->children_.size() == children_.size(), "The cloned module does not have the same number of " "child modules as the original module after calling reset(). " "Are you sure you called register_module() inside reset() " "and not the constructor?"); - // Don't remove 'this' pointer. See [[this pointer note]] - for (const auto& child : this->children_) { + for (const auto& child : children_) { copy->children_[child.key()]->clone_(*child.value(), device); } return copy; -- 2.7.4