Unify device() return type in Stream, Event, and Tensor (#16150)

author Shen Li <shenli@fb.com>

Sun, 20 Jan 2019 06:58:54 +0000 (22:58 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Sun, 20 Jan 2019 07:01:31 +0000 (23:01 -0800)
author Shen Li <shenli@fb.com>
Sun, 20 Jan 2019 06:58:54 +0000 (22:58 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Sun, 20 Jan 2019 07:01:31 +0000 (23:01 -0800)
diff --git a/aten/src/ATen/cuda/CUDAEvent.h b/aten/src/ATen/cuda/CUDAEvent.h

index 1b14685..53eadd4 100644 (file)
--- a/aten/src/ATen/cuda/CUDAEvent.h
+++ b/aten/src/ATen/cuda/CUDAEvent.h
@@ -72,8 +72,12 @@ struct AT_CUDA_API CUDAEvent {
      return left.event_ < right.event_;
    }
  
-  at::Device device() const {
-    return at::Device(at::kCUDA, device_index_);
+  optional<at::Device> device() const {
+    if (is_created_) {
+      return at::Device(at::kCUDA, device_index_);
+    } else {
+      return {};
+    }
    }
  
    bool isCreated() const { return is_created_; }
diff --git a/test/test_cuda.py b/test/test_cuda.py

index 5873216..67d3ba4 100644 (file)
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -1439,6 +1439,37 @@ class TestCuda(TestCase):
  
      @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
      @skipIfRocm
+    def test_stream_event_device(self):
+        d0 = torch.device('cuda:0')
+        d1 = torch.device('cuda:1')
+        e0 = torch.cuda.Event()
+
+        self.assertEqual(None, e0.device)
+
+        with torch.cuda.device(d0):
+            s0 = torch.cuda.current_stream()
+            s0.record_event(e0)
+
+        with torch.cuda.device(d1):
+            s1 = torch.cuda.Stream()
+            e1 = s1.record_event()
+
+        self.assertEqual(s0.device, torch.device('cuda:0'))
+        self.assertEqual(e0.device, torch.device('cuda:0'))
+        self.assertEqual(s1.device, torch.device('cuda:1'))
+        self.assertEqual(e1.device, torch.device('cuda:1'))
+
+    @skipIfRocm
+    def test_stream_event_repr(self):
+        s = torch.cuda.current_stream()
+        self.assertTrue("torch.cuda.Stream" in s.__repr__())
+        e = torch.cuda.Event()
+        self.assertTrue("torch.cuda.Event" in e.__repr__())
+        s.record_event(e)
+        self.assertTrue("torch.cuda.Event" in e.__repr__())
+
+    @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
+    @skipIfRocm
      def test_stream_context(self):
          s0 = torch.cuda.current_stream()
          s1 = torch.cuda.Stream(device=1)
@@ -1464,13 +1495,15 @@ class TestCuda(TestCase):
          self.assertEqual(0, torch.cuda.current_device())
  
      @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
+    @skipIfRocm
      def test_streams_multi_gpu(self):
          default_stream = torch.cuda.current_stream()
-        self.assertEqual(default_stream.device, 0)
+        self.assertEqual(default_stream.device, torch.device('cuda:0'))
          stream = torch.cuda.Stream(device=1)
-        self.assertEqual(stream.device, 1)
+        self.assertEqual(stream.device, torch.device('cuda:1'))
          with torch.cuda.device(1):
-            self.assertEqual(torch.cuda.current_stream().device, 1)
+            self.assertEqual(
+                torch.cuda.current_stream().device, torch.device('cuda:1'))
              self.assertNotEqual(torch.cuda.current_stream(), default_stream)
  
      @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@@ -1550,12 +1583,12 @@ class TestCuda(TestCase):
          s0 = torch.cuda.Stream(device=0, priority=low)
  
          self.assertEqual(low, s0.priority)
-        self.assertEqual(0, s0.device)
+        self.assertEqual(torch.device('cuda:0'), s0.device)
  
          s1 = torch.cuda.Stream(device=1, priority=high)
  
          self.assertEqual(high, s1.priority)
-        self.assertEqual(1, s1.device)
+        self.assertEqual(torch.device('cuda:1'), s1.device)
  
      @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
      def test_tensor_device(self):
diff --git a/test/test_multiprocessing.py b/test/test_multiprocessing.py

index fadeb43..46122b7 100644 (file)
--- a/test/test_multiprocessing.py
+++ b/test/test_multiprocessing.py
@@ -457,8 +457,7 @@ class TestMultiprocessing(TestCase):
              e1.ipc_handle()
  
      def _test_event_handle_importer_consumer(handle, p2c, c2p):
-        e1 = torch.cuda.Event.from_ipc_handle(
-            torch.cuda.current_device(), handle)
+        e1 = torch.cuda.Event.from_ipc_handle(0, handle)
          c2p.put(0)  # notify parent child is ready
          p2c.get()  # wait for record in parent
          e1.synchronize()
diff --git a/torch/csrc/cuda/Event.cpp b/torch/csrc/cuda/Event.cpp

index ce64c61..4be39c1 100644 (file)
--- a/torch/csrc/cuda/Event.cpp
+++ b/torch/csrc/cuda/Event.cpp
@@ -1,8 +1,9 @@
  #include <torch/csrc/cuda/Event.h>
+#include <torch/csrc/cuda/Module.h>
  #include <torch/csrc/cuda/Stream.h>
-
+#include <torch/csrc/Device.h>
  #include <torch/csrc/THP.h>
-#include <torch/csrc/cuda/Module.h>
+#include <torch/csrc/utils/python_arg_parser.h>
  
  #include <c10/cuda/CUDAGuard.h>
  
@@ -43,27 +44,23 @@ static PyObject * THCPEvent_pynew(
  }
  
  static PyObject * THCPEvent_from_ipc_handle(
-    PyTypeObject *type, PyObject *args) {
+    PyTypeObject *type, PyObject *args, PyObject *kwargs) {
    HANDLE_TH_ERRORS
-  long long device_index = -1;
-  const char *handle_bytes = nullptr;
-  int handle_size = 0;
  
-  // cannot use bool 'p' and bytearray 'Y' as they are not available in Python 2
-  if (!PyArg_ParseTuple(
-      args, "Ls#", &device_index, &handle_bytes, &handle_size)) {
-    return nullptr;
-  }
+  static torch::PythonArgParser parser({
+    "from_ipc_handle(Device device, std::string ipc_handle)",
+  });
+  torch::ParsedArgs<2> parsed_args;
+  auto r = parser.parse(args, kwargs, parsed_args);
  
-  AT_CHECK(handle_size == sizeof(cudaIpcEventHandle_t),
-    "cudaIpcEventHandle_t expects byte-like object of size ",
-    sizeof(cudaIpcEventHandle_t), ", but got ", handle_size);
-  AT_CHECK(device_index >= 0, "Reconstructing event from handle requires "
-    "a non-negtive device index, but got ", device_index)
+  at::Device device = r.device(0);
+  std::string handle_string = r.string(1);
  
-  // no need to release the handle byte array as it is automatically managed
-  // by the corresponding THCPEvent python object.
-  // see https://docs.python.org/3/c-api/arg.html#strings-and-buffers
+  AT_CHECK(handle_string.size() == sizeof(cudaIpcEventHandle_t),
+    "cudaIpcEventHandle_t expects byte-like object of size ",
+    sizeof(cudaIpcEventHandle_t), ", but got ", handle_string.size());
+  AT_CHECK(device.type() == at::kCUDA, "Event can only be created on "
+    "CUDA devices, but got device type ", device.type())
  
    THPObjectPtr ptr(type->tp_alloc(type, 0));
    if (!ptr) {
@@ -72,8 +69,8 @@ static PyObject * THCPEvent_from_ipc_handle(
    THCPEvent* self = (THCPEvent *)ptr.get();
  
    cudaIpcEventHandle_t handle;
-  std::memcpy(&handle, handle_bytes, handle_size);
-  new (&self->cuda_event) at::cuda::CUDAEvent(device_index, &handle);
+  std::memcpy(&handle, handle_string.c_str(), handle_string.size());
+  new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle);
  
    return (PyObject *)ptr.release();
    END_HANDLE_TH_ERRORS
@@ -92,7 +89,11 @@ static PyObject * THCPEvent_get_cuda_event(THCPEvent *self) {
  
  static PyObject * THCPEvent_get_device(THCPEvent *self) {
    HANDLE_TH_ERRORS
-  return THPUtils_packInt64(self->cuda_event.device_index());
+  at::optional<at::Device> device = self->cuda_event.device();
+  if (!device) {
+    Py_RETURN_NONE;
+  }
+  return THPDevice_New(device.value());
    END_HANDLE_TH_ERRORS
  }
  
@@ -145,7 +146,7 @@ static struct PyGetSetDef THCPEvent_properties[] = {
  
  static PyMethodDef THCPEvent_methods[] = {
    {(char*)"from_ipc_handle", (PyCFunction)THCPEvent_from_ipc_handle,
-    METH_CLASS | METH_VARARGS, nullptr},
+    METH_CLASS | METH_VARARGS | METH_KEYWORDS, nullptr},
    {(char*)"record", (PyCFunction)THCPEvent_record, METH_O, nullptr},
    {(char*)"wait", (PyCFunction)THCPEvent_wait, METH_O, nullptr},
    {(char*)"query", (PyCFunction)THCPEvent_query, METH_NOARGS, nullptr},
diff --git a/torch/csrc/cuda/Stream.cpp b/torch/csrc/cuda/Stream.cpp

index 3a0d4a4..8f704b6 100644 (file)
--- a/torch/csrc/cuda/Stream.cpp
+++ b/torch/csrc/cuda/Stream.cpp
@@ -1,7 +1,7 @@
  #include <torch/csrc/cuda/Stream.h>
-
-#include <torch/csrc/THP.h>
  #include <torch/csrc/cuda/Module.h>
+#include <torch/csrc/Device.h>
+#include <torch/csrc/THP.h>
  
  #include <c10/cuda/CUDAGuard.h>
  
@@ -52,7 +52,7 @@ static void THCPStream_dealloc(THCPStream *self) {
  
  static PyObject * THCPStream_get_device(THCPStream *self) {
    HANDLE_TH_ERRORS
-  return THPUtils_packInt64(self->cuda_stream.device_index());
+  return THPDevice_New(self->cuda_stream.device());
    END_HANDLE_TH_ERRORS
  }
  
diff --git a/torch/cuda/streams.py b/torch/cuda/streams.py

index 1736e3e..1a69a4f 100644 (file)
--- a/torch/cuda/streams.py
+++ b/torch/cuda/streams.py
@@ -192,4 +192,7 @@ class Event(torch._C._CudaEventBase):
          return ctypes.c_void_p(self.cuda_event)
  
      def __repr__(self):
-        return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
+        if self.cuda_event:
+            return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
+        else:
+            return '<torch.cuda.Event uninitialized>'
author	Shen Li <shenli@fb.com>
	Sun, 20 Jan 2019 06:58:54 +0000 (22:58 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Sun, 20 Jan 2019 07:01:31 +0000 (23:01 -0800)
aten/src/ATen/cuda/CUDAEvent.h		patch \| blob \| history
test/test_cuda.py		patch \| blob \| history
test/test_multiprocessing.py		patch \| blob \| history
torch/csrc/cuda/Event.cpp		patch \| blob \| history
torch/csrc/cuda/Stream.cpp		patch \| blob \| history
torch/cuda/streams.py		patch \| blob \| history