Remove CUDA 9.2 references conditionals and workarounds (#65070)

author Jane Xu <janeyx@fb.com>

Fri, 17 Sep 2021 18:45:11 +0000 (11:45 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Fri, 17 Sep 2021 19:28:23 +0000 (12:28 -0700)
author Jane Xu <janeyx@fb.com>
Fri, 17 Sep 2021 18:45:11 +0000 (11:45 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 17 Sep 2021 19:28:23 +0000 (12:28 -0700)
diff --git a/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h b/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h

index 72d2f65..2c48a0d 100644 (file)
--- a/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h
+++ b/aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h
@@ -7,8 +7,7 @@
  #include <ATen/native/LinearAlgebraUtils.h>
  #include <ATen/native/cuda/MiscUtils.h>
  
-#if defined(CUDART_VERSION) && defined(CUSOLVER_VERSION) && CUSOLVER_VERSION >= 10200
-// some cusolver functions don't work well on cuda 9.2 or cuda 10.1.105, cusolver is used on cuda >= 10.1.243
+#if defined(CUDART_VERSION) && defined(CUSOLVER_VERSION)
  #define USE_CUSOLVER
  #endif
  
diff --git a/c10/util/Optional.cpp b/c10/util/Optional.cpp

index dd78eee..b98dc56 100644 (file)
--- a/c10/util/Optional.cpp
+++ b/c10/util/Optional.cpp
@@ -3,9 +3,6 @@
  
  #include <type_traits>
  
-// CUDA 9.2 and below fail while trying to compile default move constructor
-// see https://github.com/pytorch/csprng/issues/84
-#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
  static_assert(
      C10_IS_TRIVIALLY_COPYABLE(c10::optional<int>),
      "c10::optional<int> should be trivially copyable");
@@ -18,4 +15,3 @@ static_assert(
  static_assert(
      sizeof(c10::optional<c10::IntArrayRef>) == sizeof(c10::IntArrayRef),
      "c10::optional<IntArrayRef> should be size-optimized");
-#endif
diff --git a/c10/util/Optional.h b/c10/util/Optional.h

index 7044c79..c2f87eb 100644 (file)
--- a/c10/util/Optional.h
+++ b/c10/util/Optional.h
@@ -499,9 +499,6 @@ template <typename T>
  struct is_arrayref<c10::ArrayRef<T>> : std::true_type {};
  } // namespace detail_
  
-// CUDA 9.2 and below fail while trying to compile default move constructor
-// see https://github.com/pytorch/csprng/issues/84
-#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
  template <class T>
  using OptionalBase = std::conditional_t<
      detail_::is_arrayref<T>::value,
@@ -524,23 +521,9 @@ using OptionalBase = std::conditional_t<
                                                               // trivial
                                                               // destructor
              optional_base<std::remove_const_t<T>>>>>;
-#else
-template <class T>
-using OptionalBase = std::conditional_t<
-    detail_::is_arrayref<T>::value,
-    arrayref_optional_base<T>,
-    std::conditional_t<
-        std::is_trivially_destructible<T>::value, // if possible
-        constexpr_optional_base<std::remove_const_t<T>>, // use base with
-                                                         // trivial destructor
-        optional_base<std::remove_const_t<T>>>>;
-#endif
  
  template <class T>
  class optional : private OptionalBase<T> {
-// CUDA 9.2 and below fail while trying to compile default move constructor
-// see https://github.com/pytorch/csprng/issues/84
-#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
    template <class U> // re-declaration for nvcc on Windows.
    using OptionalBase = std::conditional_t<
        detail_::is_arrayref<U>::value,
@@ -565,17 +548,6 @@ class optional : private OptionalBase<T> {
                                                                 // trivial
                                                                 // destructor
                optional_base<std::remove_const_t<U>>>>>;
-#else
-  template <class U>
-  using OptionalBase = std::conditional_t<
-      detail_::is_arrayref<U>::value,
-      arrayref_optional_base<U>,
-      std::conditional_t<
-          std::is_trivially_destructible<U>::value, // if possible
-          constexpr_optional_base<std::remove_const_t<U>>, // use base with
-                                                           // trivial destructor
-          optional_base<std::remove_const_t<U>>>>;
-#endif
  
    static_assert(
        !std::is_same<typename std::decay<T>::type, nullopt_t>::value,
@@ -634,20 +606,7 @@ class optional : private OptionalBase<T> {
    constexpr optional(nullopt_t) noexcept : OptionalBase<T>(){};
  
    optional(const optional& rhs) = default;
-
-// CUDA 9.2 and below fail while trying to compile default move constructor
-// see https://github.com/pytorch/csprng/issues/84
-#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
    optional(optional&& rhs) = default;
-#else
-  optional(optional&& rhs) noexcept(
-      std::is_nothrow_move_constructible<T>::value) {
-    if (rhs.initialized()) {
-      ::new (static_cast<void*>(dataptr())) T(std::move(*rhs));
-      OptionalBase<T>::setInitialized(true);
-    }
-  }
-#endif
  
    // see https://github.com/akrzemi1/Optional/issues/16
    // and https://en.cppreference.com/w/cpp/utility/optional/optional,
diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h

index 15d1ead..b670845 100644 (file)
--- a/caffe2/core/operator.h
+++ b/caffe2/core/operator.h
@@ -731,14 +731,8 @@ inline vector<int16_t> OperatorBase::GetVectorFromIValueList<int16_t>(
  
  // OP_SINGLE_ARG provides a shorter initialization choice for initialization of
  // member variables for the class constructors.
-// This is a workaround for CUDA9.2 and GCC7
-#if defined(CUDART_VERSION) && CUDART_VERSION >= 9020 && __GNUC__ >= 7
-#define OP_SINGLE_ARG(type, name, variable, default) \
-  variable(this->template GetSingleArgument<type>(name, (default)))
-#else
  #define OP_SINGLE_ARG(type, name, variable, default) \
    variable(OperatorBase::GetSingleArgument<type>(name, (default)))
-#endif
  
  // INPUT_TAGS and OUTPUT_TAGS are optional features to name the indices of the
  // operator's inputs and outputs, in order to avoid confusion. For example, for
diff --git a/test/cpp/jit/test_gpu.cpp b/test/cpp/jit/test_gpu.cpp

index 1a0ee7b..4674082 100644 (file)
--- a/test/cpp/jit/test_gpu.cpp
+++ b/test/cpp/jit/test_gpu.cpp
@@ -2606,44 +2606,40 @@ TEST(NVFuserTest, FusionUnaryOps_CUDA) {
    using OpTuple =
        std::tuple<at::Tensor (*)(const at::Tensor&), UnaryOpType, std::string>;
  
-  // [Note: explicit tuple type for uniform initialization list]
-  // Tuple type must be explicitly specified for each uniform initialization
-  // list within the vector to make this code compatible with some old env
-  // which we still need to support. eg. gcc 5.4 + cuda 9.2.
    std::vector<OpTuple> ops{
-      OpTuple{at::abs, UnaryOpType::Abs, "abs"},
-      OpTuple{at::acos, UnaryOpType::Acos, "acos"},
-      OpTuple{at::asin, UnaryOpType::Asin, "asin"},
-      OpTuple{at::atan, UnaryOpType::Atan, "atan"},
+      {at::abs, UnaryOpType::Abs, "abs"},
+      {at::acos, UnaryOpType::Acos, "acos"},
+      {at::asin, UnaryOpType::Asin, "asin"},
+      {at::atan, UnaryOpType::Atan, "atan"},
        // There does not appear to be an appropriate ATen function for atanh
-      // OpTuple{at::atanh,      UnaryOpType::Atanh,      "atanh"      },
-      OpTuple{at::ceil, UnaryOpType::Ceil, "ceil"},
-      OpTuple{at::cos, UnaryOpType::Cos, "cos"},
-      OpTuple{at::cosh, UnaryOpType::Cosh, "cosh"},
-      OpTuple{at::erf, UnaryOpType::Erf, "erf"},
-      OpTuple{at::erfc, UnaryOpType::Erfc, "erfc"},
-      OpTuple{at::exp, UnaryOpType::Exp, "exp"},
-      OpTuple{at::expm1, UnaryOpType::Expm1, "expm1"},
-      OpTuple{at::floor, UnaryOpType::Floor, "floor"},
-      OpTuple{at::frac, UnaryOpType::Frac, "frac"},
-      OpTuple{at::gelu, UnaryOpType::Gelu, "gelu"},
-      OpTuple{at::lgamma, UnaryOpType::Lgamma, "lgamma"},
-      OpTuple{at::log, UnaryOpType::Log, "log"},
-      OpTuple{at::log10, UnaryOpType::Log10, "log10"},
-      OpTuple{at::log1p, UnaryOpType::Log1p, "log1p"},
-      OpTuple{at::log2, UnaryOpType::Log2, "log2"},
-      OpTuple{at::neg, UnaryOpType::Neg, "neg"},
-      OpTuple{at::reciprocal, UnaryOpType::Reciprocal, "reciprocal"},
-      OpTuple{at::relu, UnaryOpType::Relu, "relu"},
-      OpTuple{at::round, UnaryOpType::Round, "round"},
-      OpTuple{at::rsqrt, UnaryOpType::Rsqrt, "rsqrt"},
-      OpTuple{at::sigmoid, UnaryOpType::Sigmoid, "sigmoid"},
-      OpTuple{at::sin, UnaryOpType::Sin, "sin"},
-      OpTuple{at::sinh, UnaryOpType::Sinh, "sinh"},
-      OpTuple{at::sqrt, UnaryOpType::Sqrt, "sqrt"},
-      OpTuple{at::tan, UnaryOpType::Tan, "tan"},
-      OpTuple{at::tanh, UnaryOpType::Tanh, "tanh"},
-      OpTuple{at::trunc, UnaryOpType::Trunc, "trunc"}};
+      // {at::atanh,      UnaryOpType::Atanh,      "atanh"      },
+      {at::ceil, UnaryOpType::Ceil, "ceil"},
+      {at::cos, UnaryOpType::Cos, "cos"},
+      {at::cosh, UnaryOpType::Cosh, "cosh"},
+      {at::erf, UnaryOpType::Erf, "erf"},
+      {at::erfc, UnaryOpType::Erfc, "erfc"},
+      {at::exp, UnaryOpType::Exp, "exp"},
+      {at::expm1, UnaryOpType::Expm1, "expm1"},
+      {at::floor, UnaryOpType::Floor, "floor"},
+      {at::frac, UnaryOpType::Frac, "frac"},
+      {at::gelu, UnaryOpType::Gelu, "gelu"},
+      {at::lgamma, UnaryOpType::Lgamma, "lgamma"},
+      {at::log, UnaryOpType::Log, "log"},
+      {at::log10, UnaryOpType::Log10, "log10"},
+      {at::log1p, UnaryOpType::Log1p, "log1p"},
+      {at::log2, UnaryOpType::Log2, "log2"},
+      {at::neg, UnaryOpType::Neg, "neg"},
+      {at::reciprocal, UnaryOpType::Reciprocal, "reciprocal"},
+      {at::relu, UnaryOpType::Relu, "relu"},
+      {at::round, UnaryOpType::Round, "round"},
+      {at::rsqrt, UnaryOpType::Rsqrt, "rsqrt"},
+      {at::sigmoid, UnaryOpType::Sigmoid, "sigmoid"},
+      {at::sin, UnaryOpType::Sin, "sin"},
+      {at::sinh, UnaryOpType::Sinh, "sinh"},
+      {at::sqrt, UnaryOpType::Sqrt, "sqrt"},
+      {at::tan, UnaryOpType::Tan, "tan"},
+      {at::tanh, UnaryOpType::Tanh, "tanh"},
+      {at::trunc, UnaryOpType::Trunc, "trunc"}};
  
    std::for_each(ops.begin(), ops.end(), [](OpTuple& op) {
      test_op(
@@ -2680,14 +2676,13 @@ TEST(NVFuserTest, FusionBinaryOps_CUDA) {
    using AtenFuncSig = at::Tensor (*)(const at::Tensor&, const at::Tensor&);
    using OpTuple = std::tuple<AtenFuncSig, BinaryOpType, std::string>;
  
-  // see [Note: explicit tuple type for uniform initialization list]
    std::vector<OpTuple> logic_ops{
-      OpTuple{at::eq, BinaryOpType::Eq, "eq"},
-      OpTuple{at::ge, BinaryOpType::GE, "ge"},
-      OpTuple{at::gt, BinaryOpType::GT, "gt"},
-      OpTuple{at::le, BinaryOpType::LE, "le"},
-      OpTuple{at::lt, BinaryOpType::LT, "lt"},
-      OpTuple{at::ne, BinaryOpType::NE, "ne"}};
+      {at::eq, BinaryOpType::Eq, "eq"},
+      {at::ge, BinaryOpType::GE, "ge"},
+      {at::gt, BinaryOpType::GT, "gt"},
+      {at::le, BinaryOpType::LE, "le"},
+      {at::lt, BinaryOpType::LT, "lt"},
+      {at::ne, BinaryOpType::NE, "ne"}};
  
    std::for_each(logic_ops.begin(), logic_ops.end(), [](OpTuple& op) {
      test_op(
@@ -2709,18 +2704,17 @@ TEST(NVFuserTest, FusionBinaryOps_CUDA) {
              std::make_pair(ValType::TensorView, DataType::Float)));
    });
  
-  // see [Note: explicit tuple type for uniform initialization list]
    std::vector<OpTuple> math_ops{
-      OpTuple{at::atan2, BinaryOpType::Atan2, "atan2"},
-      OpTuple{at::div, BinaryOpType::Div, "div"},
-      OpTuple{at::fmod, BinaryOpType::Fmod, "fmod"},
-      OpTuple{at::max, BinaryOpType::Max, "max"},
-      OpTuple{at::min, BinaryOpType::Min, "min"},
-      OpTuple{at::mul, BinaryOpType::Mul, "mul"},
-      OpTuple{at::pow, BinaryOpType::Pow, "pow"},
+      {at::atan2, BinaryOpType::Atan2, "atan2"},
+      {at::div, BinaryOpType::Div, "div"},
+      {at::fmod, BinaryOpType::Fmod, "fmod"},
+      {at::max, BinaryOpType::Max, "max"},
+      {at::min, BinaryOpType::Min, "min"},
+      {at::mul, BinaryOpType::Mul, "mul"},
+      {at::pow, BinaryOpType::Pow, "pow"},
        // NOTE: Remainder does not match the Aten impl exactly
        // despite using an identical function.
-      OpTuple{at::remainder, BinaryOpType::Remainder, "remainder"},
+      {at::remainder, BinaryOpType::Remainder, "remainder"},
    };
  
    std::for_each(math_ops.begin(), math_ops.end(), [](OpTuple& op) {
diff --git a/torch/__init__.py b/torch/__init__.py

index 5740b7a..18c9b08 100644 (file)
--- a/torch/__init__.py
+++ b/torch/__init__.py
@@ -106,8 +106,7 @@ if sys.platform == 'win32':
      try:
          ctypes.CDLL('vcruntime140.dll')
          ctypes.CDLL('msvcp140.dll')
-        if cuda_version not in ('9.2', '10.0'):
-            ctypes.CDLL('vcruntime140_1.dll')
+        ctypes.CDLL('vcruntime140_1.dll')
      except OSError:
          print('''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
                   It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe''')
diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py

index c38ad99..c121b11 100644 (file)
--- a/torch/autograd/profiler.py
+++ b/torch/autograd/profiler.py
@@ -654,8 +654,8 @@ def parse_nvprof_trace(path):
      unique = EnforceUnique()
      for row in conn.execute(kernel_query):
          unique.see(row['marker_id'], row['runtime_id'])
-        # 211 is cudaKernelLaunch for cuda >= 9.2; 13 is for older cuda versions
-        assert (row['cbid'] == 211) or (row['cbid'] == 13)
+        # 211 is cudaKernelLaunch for cuda >= 9.2
+        assert (row['cbid'] == 211)
          evt = functions_map[row['marker_id']]
          evt.append_kernel(row['kernel_name'],
                            0,
diff --git a/torch/csrc/api/include/torch/nn/cloneable.h b/torch/csrc/api/include/torch/nn/cloneable.h

index 463784a..cc735d6 100644 (file)
--- a/torch/csrc/api/include/torch/nn/cloneable.h
+++ b/torch/csrc/api/include/torch/nn/cloneable.h
@@ -42,12 +42,8 @@ class Cloneable : public virtual Module {
      copy->buffers_.clear();
      copy->children_.clear();
      copy->reset();
-    // [[this pointer note]]
-    // Don't remove 'this' pointer, nvcc needs it to be explicitly given in some envs.
-    // eg. ubuntu 16.04 + gcc 5.x + cuda 9.2
-    //     ubuntu 16.04 + gcc 7.x + cuda 9.2
      TORCH_CHECK(
-        copy->parameters_.size() == this->parameters_.size(),
+        copy->parameters_.size() == parameters_.size(),
          "The cloned module does not have the same number of "
          "parameters as the original module after calling reset(). "
          "Are you sure you called register_parameter() inside reset() "
@@ -58,9 +54,8 @@ class Cloneable : public virtual Module {
            tensor.to(*device) : autograd::Variable(tensor).clone();
        copy->parameters_[parameter.key()].set_data(data);
      }
-    // Don't remove 'this' pointer. See [[this pointer note]]
      TORCH_CHECK(
-        copy->buffers_.size() == this->buffers_.size(),
+        copy->buffers_.size() == buffers_.size(),
          "The cloned module does not have the same number of "
          "buffers as the original module after calling reset(). "
          "Are you sure you called register_buffer() inside reset() "
@@ -71,15 +66,13 @@ class Cloneable : public virtual Module {
            tensor.to(*device) : autograd::Variable(tensor).clone();
        copy->buffers_[buffer.key()].set_data(data);
      }
-    // Don't remove 'this' pointer. See [[this pointer note]]
      TORCH_CHECK(
-        copy->children_.size() == this->children_.size(),
+        copy->children_.size() == children_.size(),
          "The cloned module does not have the same number of "
          "child modules as the original module after calling reset(). "
          "Are you sure you called register_module() inside reset() "
          "and not the constructor?");
-    // Don't remove 'this' pointer. See [[this pointer note]]
-    for (const auto& child : this->children_) {
+    for (const auto& child : children_) {
        copy->children_[child.key()]->clone_(*child.value(), device);
      }
      return copy;
author	Jane Xu <janeyx@fb.com>
	Fri, 17 Sep 2021 18:45:11 +0000 (11:45 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 17 Sep 2021 19:28:23 +0000 (12:28 -0700)
aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h		patch \| blob \| history
c10/util/Optional.cpp		patch \| blob \| history
c10/util/Optional.h		patch \| blob \| history
caffe2/core/operator.h		patch \| blob \| history
test/cpp/jit/test_gpu.cpp		patch \| blob \| history
torch/__init__.py		patch \| blob \| history
torch/autograd/profiler.py		patch \| blob \| history
torch/csrc/api/include/torch/nn/cloneable.h		patch \| blob \| history