Unify device() return type in Stream, Event, and Tensor (#16150)
authorShen Li <shenli@fb.com>
Sun, 20 Jan 2019 06:58:54 +0000 (22:58 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Sun, 20 Jan 2019 07:01:31 +0000 (23:01 -0800)
Summary:
Addresses one future work item in #15937
Pull Request resolved: https://github.com/pytorch/pytorch/pull/16150

Differential Revision: D13732299

Pulled By: mrshenli

fbshipit-source-id: 4d0b35df573a3bf92dea6e2e7eb42fe8bac77b18

aten/src/ATen/cuda/CUDAEvent.h
test/test_cuda.py
test/test_multiprocessing.py
torch/csrc/cuda/Event.cpp
torch/csrc/cuda/Stream.cpp
torch/cuda/streams.py

index 1b14685..53eadd4 100644 (file)
@@ -72,8 +72,12 @@ struct AT_CUDA_API CUDAEvent {
     return left.event_ < right.event_;
   }
 
-  at::Device device() const {
-    return at::Device(at::kCUDA, device_index_);
+  optional<at::Device> device() const {
+    if (is_created_) {
+      return at::Device(at::kCUDA, device_index_);
+    } else {
+      return {};
+    }
   }
 
   bool isCreated() const { return is_created_; }
index 5873216..67d3ba4 100644 (file)
@@ -1439,6 +1439,37 @@ class TestCuda(TestCase):
 
     @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
     @skipIfRocm
+    def test_stream_event_device(self):
+        d0 = torch.device('cuda:0')
+        d1 = torch.device('cuda:1')
+        e0 = torch.cuda.Event()
+
+        self.assertEqual(None, e0.device)
+
+        with torch.cuda.device(d0):
+            s0 = torch.cuda.current_stream()
+            s0.record_event(e0)
+
+        with torch.cuda.device(d1):
+            s1 = torch.cuda.Stream()
+            e1 = s1.record_event()
+
+        self.assertEqual(s0.device, torch.device('cuda:0'))
+        self.assertEqual(e0.device, torch.device('cuda:0'))
+        self.assertEqual(s1.device, torch.device('cuda:1'))
+        self.assertEqual(e1.device, torch.device('cuda:1'))
+
+    @skipIfRocm
+    def test_stream_event_repr(self):
+        s = torch.cuda.current_stream()
+        self.assertTrue("torch.cuda.Stream" in s.__repr__())
+        e = torch.cuda.Event()
+        self.assertTrue("torch.cuda.Event" in e.__repr__())
+        s.record_event(e)
+        self.assertTrue("torch.cuda.Event" in e.__repr__())
+
+    @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
+    @skipIfRocm
     def test_stream_context(self):
         s0 = torch.cuda.current_stream()
         s1 = torch.cuda.Stream(device=1)
@@ -1464,13 +1495,15 @@ class TestCuda(TestCase):
         self.assertEqual(0, torch.cuda.current_device())
 
     @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
+    @skipIfRocm
     def test_streams_multi_gpu(self):
         default_stream = torch.cuda.current_stream()
-        self.assertEqual(default_stream.device, 0)
+        self.assertEqual(default_stream.device, torch.device('cuda:0'))
         stream = torch.cuda.Stream(device=1)
-        self.assertEqual(stream.device, 1)
+        self.assertEqual(stream.device, torch.device('cuda:1'))
         with torch.cuda.device(1):
-            self.assertEqual(torch.cuda.current_stream().device, 1)
+            self.assertEqual(
+                torch.cuda.current_stream().device, torch.device('cuda:1'))
             self.assertNotEqual(torch.cuda.current_stream(), default_stream)
 
     @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@@ -1550,12 +1583,12 @@ class TestCuda(TestCase):
         s0 = torch.cuda.Stream(device=0, priority=low)
 
         self.assertEqual(low, s0.priority)
-        self.assertEqual(0, s0.device)
+        self.assertEqual(torch.device('cuda:0'), s0.device)
 
         s1 = torch.cuda.Stream(device=1, priority=high)
 
         self.assertEqual(high, s1.priority)
-        self.assertEqual(1, s1.device)
+        self.assertEqual(torch.device('cuda:1'), s1.device)
 
     @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
     def test_tensor_device(self):
index fadeb43..46122b7 100644 (file)
@@ -457,8 +457,7 @@ class TestMultiprocessing(TestCase):
             e1.ipc_handle()
 
     def _test_event_handle_importer_consumer(handle, p2c, c2p):
-        e1 = torch.cuda.Event.from_ipc_handle(
-            torch.cuda.current_device(), handle)
+        e1 = torch.cuda.Event.from_ipc_handle(0, handle)
         c2p.put(0)  # notify parent child is ready
         p2c.get()  # wait for record in parent
         e1.synchronize()
index ce64c61..4be39c1 100644 (file)
@@ -1,8 +1,9 @@
 #include <torch/csrc/cuda/Event.h>
+#include <torch/csrc/cuda/Module.h>
 #include <torch/csrc/cuda/Stream.h>
-
+#include <torch/csrc/Device.h>
 #include <torch/csrc/THP.h>
-#include <torch/csrc/cuda/Module.h>
+#include <torch/csrc/utils/python_arg_parser.h>
 
 #include <c10/cuda/CUDAGuard.h>
 
@@ -43,27 +44,23 @@ static PyObject * THCPEvent_pynew(
 }
 
 static PyObject * THCPEvent_from_ipc_handle(
-    PyTypeObject *type, PyObject *args) {
+    PyTypeObject *type, PyObject *args, PyObject *kwargs) {
   HANDLE_TH_ERRORS
-  long long device_index = -1;
-  const char *handle_bytes = nullptr;
-  int handle_size = 0;
 
-  // cannot use bool 'p' and bytearray 'Y' as they are not available in Python 2
-  if (!PyArg_ParseTuple(
-      args, "Ls#", &device_index, &handle_bytes, &handle_size)) {
-    return nullptr;
-  }
+  static torch::PythonArgParser parser({
+    "from_ipc_handle(Device device, std::string ipc_handle)",
+  });
+  torch::ParsedArgs<2> parsed_args;
+  auto r = parser.parse(args, kwargs, parsed_args);
 
-  AT_CHECK(handle_size == sizeof(cudaIpcEventHandle_t),
-    "cudaIpcEventHandle_t expects byte-like object of size ",
-    sizeof(cudaIpcEventHandle_t), ", but got ", handle_size);
-  AT_CHECK(device_index >= 0, "Reconstructing event from handle requires "
-    "a non-negtive device index, but got ", device_index)
+  at::Device device = r.device(0);
+  std::string handle_string = r.string(1);
 
-  // no need to release the handle byte array as it is automatically managed
-  // by the corresponding THCPEvent python object.
-  // see https://docs.python.org/3/c-api/arg.html#strings-and-buffers
+  AT_CHECK(handle_string.size() == sizeof(cudaIpcEventHandle_t),
+    "cudaIpcEventHandle_t expects byte-like object of size ",
+    sizeof(cudaIpcEventHandle_t), ", but got ", handle_string.size());
+  AT_CHECK(device.type() == at::kCUDA, "Event can only be created on "
+    "CUDA devices, but got device type ", device.type())
 
   THPObjectPtr ptr(type->tp_alloc(type, 0));
   if (!ptr) {
@@ -72,8 +69,8 @@ static PyObject * THCPEvent_from_ipc_handle(
   THCPEvent* self = (THCPEvent *)ptr.get();
 
   cudaIpcEventHandle_t handle;
-  std::memcpy(&handle, handle_bytes, handle_size);
-  new (&self->cuda_event) at::cuda::CUDAEvent(device_index, &handle);
+  std::memcpy(&handle, handle_string.c_str(), handle_string.size());
+  new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle);
 
   return (PyObject *)ptr.release();
   END_HANDLE_TH_ERRORS
@@ -92,7 +89,11 @@ static PyObject * THCPEvent_get_cuda_event(THCPEvent *self) {
 
 static PyObject * THCPEvent_get_device(THCPEvent *self) {
   HANDLE_TH_ERRORS
-  return THPUtils_packInt64(self->cuda_event.device_index());
+  at::optional<at::Device> device = self->cuda_event.device();
+  if (!device) {
+    Py_RETURN_NONE;
+  }
+  return THPDevice_New(device.value());
   END_HANDLE_TH_ERRORS
 }
 
@@ -145,7 +146,7 @@ static struct PyGetSetDef THCPEvent_properties[] = {
 
 static PyMethodDef THCPEvent_methods[] = {
   {(char*)"from_ipc_handle", (PyCFunction)THCPEvent_from_ipc_handle,
-    METH_CLASS | METH_VARARGS, nullptr},
+    METH_CLASS | METH_VARARGS | METH_KEYWORDS, nullptr},
   {(char*)"record", (PyCFunction)THCPEvent_record, METH_O, nullptr},
   {(char*)"wait", (PyCFunction)THCPEvent_wait, METH_O, nullptr},
   {(char*)"query", (PyCFunction)THCPEvent_query, METH_NOARGS, nullptr},
index 3a0d4a4..8f704b6 100644 (file)
@@ -1,7 +1,7 @@
 #include <torch/csrc/cuda/Stream.h>
-
-#include <torch/csrc/THP.h>
 #include <torch/csrc/cuda/Module.h>
+#include <torch/csrc/Device.h>
+#include <torch/csrc/THP.h>
 
 #include <c10/cuda/CUDAGuard.h>
 
@@ -52,7 +52,7 @@ static void THCPStream_dealloc(THCPStream *self) {
 
 static PyObject * THCPStream_get_device(THCPStream *self) {
   HANDLE_TH_ERRORS
-  return THPUtils_packInt64(self->cuda_stream.device_index());
+  return THPDevice_New(self->cuda_stream.device());
   END_HANDLE_TH_ERRORS
 }
 
index 1736e3e..1a69a4f 100644 (file)
@@ -192,4 +192,7 @@ class Event(torch._C._CudaEventBase):
         return ctypes.c_void_p(self.cuda_event)
 
     def __repr__(self):
-        return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
+        if self.cuda_event:
+            return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
+        else:
+            return '<torch.cuda.Event uninitialized>'