From: Shen Li Date: Sun, 20 Jan 2019 06:58:54 +0000 (-0800) Subject: Unify device() return type in Stream, Event, and Tensor (#16150) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~1748 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=898329c3f93401f731eac41bbd6e9dffa0cc7a85;p=platform%2Fupstream%2Fpytorch.git Unify device() return type in Stream, Event, and Tensor (#16150) Summary: Addresses one future work item in #15937 Pull Request resolved: https://github.com/pytorch/pytorch/pull/16150 Differential Revision: D13732299 Pulled By: mrshenli fbshipit-source-id: 4d0b35df573a3bf92dea6e2e7eb42fe8bac77b18 --- diff --git a/aten/src/ATen/cuda/CUDAEvent.h b/aten/src/ATen/cuda/CUDAEvent.h index 1b14685..53eadd4 100644 --- a/aten/src/ATen/cuda/CUDAEvent.h +++ b/aten/src/ATen/cuda/CUDAEvent.h @@ -72,8 +72,12 @@ struct AT_CUDA_API CUDAEvent { return left.event_ < right.event_; } - at::Device device() const { - return at::Device(at::kCUDA, device_index_); + optional device() const { + if (is_created_) { + return at::Device(at::kCUDA, device_index_); + } else { + return {}; + } } bool isCreated() const { return is_created_; } diff --git a/test/test_cuda.py b/test/test_cuda.py index 5873216..67d3ba4 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -1439,6 +1439,37 @@ class TestCuda(TestCase): @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @skipIfRocm + def test_stream_event_device(self): + d0 = torch.device('cuda:0') + d1 = torch.device('cuda:1') + e0 = torch.cuda.Event() + + self.assertEqual(None, e0.device) + + with torch.cuda.device(d0): + s0 = torch.cuda.current_stream() + s0.record_event(e0) + + with torch.cuda.device(d1): + s1 = torch.cuda.Stream() + e1 = s1.record_event() + + self.assertEqual(s0.device, torch.device('cuda:0')) + self.assertEqual(e0.device, torch.device('cuda:0')) + self.assertEqual(s1.device, torch.device('cuda:1')) + self.assertEqual(e1.device, torch.device('cuda:1')) + + @skipIfRocm + def test_stream_event_repr(self): + s = torch.cuda.current_stream() + self.assertTrue("torch.cuda.Stream" in s.__repr__()) + e = torch.cuda.Event() + self.assertTrue("torch.cuda.Event" in e.__repr__()) + s.record_event(e) + self.assertTrue("torch.cuda.Event" in e.__repr__()) + + @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") + @skipIfRocm def test_stream_context(self): s0 = torch.cuda.current_stream() s1 = torch.cuda.Stream(device=1) @@ -1464,13 +1495,15 @@ class TestCuda(TestCase): self.assertEqual(0, torch.cuda.current_device()) @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") + @skipIfRocm def test_streams_multi_gpu(self): default_stream = torch.cuda.current_stream() - self.assertEqual(default_stream.device, 0) + self.assertEqual(default_stream.device, torch.device('cuda:0')) stream = torch.cuda.Stream(device=1) - self.assertEqual(stream.device, 1) + self.assertEqual(stream.device, torch.device('cuda:1')) with torch.cuda.device(1): - self.assertEqual(torch.cuda.current_stream().device, 1) + self.assertEqual( + torch.cuda.current_stream().device, torch.device('cuda:1')) self.assertNotEqual(torch.cuda.current_stream(), default_stream) @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @@ -1550,12 +1583,12 @@ class TestCuda(TestCase): s0 = torch.cuda.Stream(device=0, priority=low) self.assertEqual(low, s0.priority) - self.assertEqual(0, s0.device) + self.assertEqual(torch.device('cuda:0'), s0.device) s1 = torch.cuda.Stream(device=1, priority=high) self.assertEqual(high, s1.priority) - self.assertEqual(1, s1.device) + self.assertEqual(torch.device('cuda:1'), s1.device) @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported") def test_tensor_device(self): diff --git a/test/test_multiprocessing.py b/test/test_multiprocessing.py index fadeb43..46122b7 100644 --- a/test/test_multiprocessing.py +++ b/test/test_multiprocessing.py @@ -457,8 +457,7 @@ class TestMultiprocessing(TestCase): e1.ipc_handle() def _test_event_handle_importer_consumer(handle, p2c, c2p): - e1 = torch.cuda.Event.from_ipc_handle( - torch.cuda.current_device(), handle) + e1 = torch.cuda.Event.from_ipc_handle(0, handle) c2p.put(0) # notify parent child is ready p2c.get() # wait for record in parent e1.synchronize() diff --git a/torch/csrc/cuda/Event.cpp b/torch/csrc/cuda/Event.cpp index ce64c61..4be39c1 100644 --- a/torch/csrc/cuda/Event.cpp +++ b/torch/csrc/cuda/Event.cpp @@ -1,8 +1,9 @@ #include +#include #include - +#include #include -#include +#include #include @@ -43,27 +44,23 @@ static PyObject * THCPEvent_pynew( } static PyObject * THCPEvent_from_ipc_handle( - PyTypeObject *type, PyObject *args) { + PyTypeObject *type, PyObject *args, PyObject *kwargs) { HANDLE_TH_ERRORS - long long device_index = -1; - const char *handle_bytes = nullptr; - int handle_size = 0; - // cannot use bool 'p' and bytearray 'Y' as they are not available in Python 2 - if (!PyArg_ParseTuple( - args, "Ls#", &device_index, &handle_bytes, &handle_size)) { - return nullptr; - } + static torch::PythonArgParser parser({ + "from_ipc_handle(Device device, std::string ipc_handle)", + }); + torch::ParsedArgs<2> parsed_args; + auto r = parser.parse(args, kwargs, parsed_args); - AT_CHECK(handle_size == sizeof(cudaIpcEventHandle_t), - "cudaIpcEventHandle_t expects byte-like object of size ", - sizeof(cudaIpcEventHandle_t), ", but got ", handle_size); - AT_CHECK(device_index >= 0, "Reconstructing event from handle requires " - "a non-negtive device index, but got ", device_index) + at::Device device = r.device(0); + std::string handle_string = r.string(1); - // no need to release the handle byte array as it is automatically managed - // by the corresponding THCPEvent python object. - // see https://docs.python.org/3/c-api/arg.html#strings-and-buffers + AT_CHECK(handle_string.size() == sizeof(cudaIpcEventHandle_t), + "cudaIpcEventHandle_t expects byte-like object of size ", + sizeof(cudaIpcEventHandle_t), ", but got ", handle_string.size()); + AT_CHECK(device.type() == at::kCUDA, "Event can only be created on " + "CUDA devices, but got device type ", device.type()) THPObjectPtr ptr(type->tp_alloc(type, 0)); if (!ptr) { @@ -72,8 +69,8 @@ static PyObject * THCPEvent_from_ipc_handle( THCPEvent* self = (THCPEvent *)ptr.get(); cudaIpcEventHandle_t handle; - std::memcpy(&handle, handle_bytes, handle_size); - new (&self->cuda_event) at::cuda::CUDAEvent(device_index, &handle); + std::memcpy(&handle, handle_string.c_str(), handle_string.size()); + new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle); return (PyObject *)ptr.release(); END_HANDLE_TH_ERRORS @@ -92,7 +89,11 @@ static PyObject * THCPEvent_get_cuda_event(THCPEvent *self) { static PyObject * THCPEvent_get_device(THCPEvent *self) { HANDLE_TH_ERRORS - return THPUtils_packInt64(self->cuda_event.device_index()); + at::optional device = self->cuda_event.device(); + if (!device) { + Py_RETURN_NONE; + } + return THPDevice_New(device.value()); END_HANDLE_TH_ERRORS } @@ -145,7 +146,7 @@ static struct PyGetSetDef THCPEvent_properties[] = { static PyMethodDef THCPEvent_methods[] = { {(char*)"from_ipc_handle", (PyCFunction)THCPEvent_from_ipc_handle, - METH_CLASS | METH_VARARGS, nullptr}, + METH_CLASS | METH_VARARGS | METH_KEYWORDS, nullptr}, {(char*)"record", (PyCFunction)THCPEvent_record, METH_O, nullptr}, {(char*)"wait", (PyCFunction)THCPEvent_wait, METH_O, nullptr}, {(char*)"query", (PyCFunction)THCPEvent_query, METH_NOARGS, nullptr}, diff --git a/torch/csrc/cuda/Stream.cpp b/torch/csrc/cuda/Stream.cpp index 3a0d4a4..8f704b6 100644 --- a/torch/csrc/cuda/Stream.cpp +++ b/torch/csrc/cuda/Stream.cpp @@ -1,7 +1,7 @@ #include - -#include #include +#include +#include #include @@ -52,7 +52,7 @@ static void THCPStream_dealloc(THCPStream *self) { static PyObject * THCPStream_get_device(THCPStream *self) { HANDLE_TH_ERRORS - return THPUtils_packInt64(self->cuda_stream.device_index()); + return THPDevice_New(self->cuda_stream.device()); END_HANDLE_TH_ERRORS } diff --git a/torch/cuda/streams.py b/torch/cuda/streams.py index 1736e3e..1a69a4f 100644 --- a/torch/cuda/streams.py +++ b/torch/cuda/streams.py @@ -192,4 +192,7 @@ class Event(torch._C._CudaEventBase): return ctypes.c_void_p(self.cuda_event) def __repr__(self): - return ''.format(self._as_parameter_.value) + if self.cuda_event: + return ''.format(self._as_parameter_.value) + else: + return ''