return left.event_ < right.event_;
}
- at::Device device() const {
- return at::Device(at::kCUDA, device_index_);
+ optional<at::Device> device() const {
+ if (is_created_) {
+ return at::Device(at::kCUDA, device_index_);
+ } else {
+ return {};
+ }
}
bool isCreated() const { return is_created_; }
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
+ def test_stream_event_device(self):
+ d0 = torch.device('cuda:0')
+ d1 = torch.device('cuda:1')
+ e0 = torch.cuda.Event()
+
+ self.assertEqual(None, e0.device)
+
+ with torch.cuda.device(d0):
+ s0 = torch.cuda.current_stream()
+ s0.record_event(e0)
+
+ with torch.cuda.device(d1):
+ s1 = torch.cuda.Stream()
+ e1 = s1.record_event()
+
+ self.assertEqual(s0.device, torch.device('cuda:0'))
+ self.assertEqual(e0.device, torch.device('cuda:0'))
+ self.assertEqual(s1.device, torch.device('cuda:1'))
+ self.assertEqual(e1.device, torch.device('cuda:1'))
+
+ @skipIfRocm
+ def test_stream_event_repr(self):
+ s = torch.cuda.current_stream()
+ self.assertTrue("torch.cuda.Stream" in s.__repr__())
+ e = torch.cuda.Event()
+ self.assertTrue("torch.cuda.Event" in e.__repr__())
+ s.record_event(e)
+ self.assertTrue("torch.cuda.Event" in e.__repr__())
+
+ @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
+ @skipIfRocm
def test_stream_context(self):
s0 = torch.cuda.current_stream()
s1 = torch.cuda.Stream(device=1)
self.assertEqual(0, torch.cuda.current_device())
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
+ @skipIfRocm
def test_streams_multi_gpu(self):
default_stream = torch.cuda.current_stream()
- self.assertEqual(default_stream.device, 0)
+ self.assertEqual(default_stream.device, torch.device('cuda:0'))
stream = torch.cuda.Stream(device=1)
- self.assertEqual(stream.device, 1)
+ self.assertEqual(stream.device, torch.device('cuda:1'))
with torch.cuda.device(1):
- self.assertEqual(torch.cuda.current_stream().device, 1)
+ self.assertEqual(
+ torch.cuda.current_stream().device, torch.device('cuda:1'))
self.assertNotEqual(torch.cuda.current_stream(), default_stream)
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
s0 = torch.cuda.Stream(device=0, priority=low)
self.assertEqual(low, s0.priority)
- self.assertEqual(0, s0.device)
+ self.assertEqual(torch.device('cuda:0'), s0.device)
s1 = torch.cuda.Stream(device=1, priority=high)
self.assertEqual(high, s1.priority)
- self.assertEqual(1, s1.device)
+ self.assertEqual(torch.device('cuda:1'), s1.device)
@unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
def test_tensor_device(self):
e1.ipc_handle()
def _test_event_handle_importer_consumer(handle, p2c, c2p):
- e1 = torch.cuda.Event.from_ipc_handle(
- torch.cuda.current_device(), handle)
+ e1 = torch.cuda.Event.from_ipc_handle(0, handle)
c2p.put(0) # notify parent child is ready
p2c.get() # wait for record in parent
e1.synchronize()
#include <torch/csrc/cuda/Event.h>
+#include <torch/csrc/cuda/Module.h>
#include <torch/csrc/cuda/Stream.h>
-
+#include <torch/csrc/Device.h>
#include <torch/csrc/THP.h>
-#include <torch/csrc/cuda/Module.h>
+#include <torch/csrc/utils/python_arg_parser.h>
#include <c10/cuda/CUDAGuard.h>
}
static PyObject * THCPEvent_from_ipc_handle(
- PyTypeObject *type, PyObject *args) {
+ PyTypeObject *type, PyObject *args, PyObject *kwargs) {
HANDLE_TH_ERRORS
- long long device_index = -1;
- const char *handle_bytes = nullptr;
- int handle_size = 0;
- // cannot use bool 'p' and bytearray 'Y' as they are not available in Python 2
- if (!PyArg_ParseTuple(
- args, "Ls#", &device_index, &handle_bytes, &handle_size)) {
- return nullptr;
- }
+ static torch::PythonArgParser parser({
+ "from_ipc_handle(Device device, std::string ipc_handle)",
+ });
+ torch::ParsedArgs<2> parsed_args;
+ auto r = parser.parse(args, kwargs, parsed_args);
- AT_CHECK(handle_size == sizeof(cudaIpcEventHandle_t),
- "cudaIpcEventHandle_t expects byte-like object of size ",
- sizeof(cudaIpcEventHandle_t), ", but got ", handle_size);
- AT_CHECK(device_index >= 0, "Reconstructing event from handle requires "
- "a non-negtive device index, but got ", device_index)
+ at::Device device = r.device(0);
+ std::string handle_string = r.string(1);
- // no need to release the handle byte array as it is automatically managed
- // by the corresponding THCPEvent python object.
- // see https://docs.python.org/3/c-api/arg.html#strings-and-buffers
+ AT_CHECK(handle_string.size() == sizeof(cudaIpcEventHandle_t),
+ "cudaIpcEventHandle_t expects byte-like object of size ",
+ sizeof(cudaIpcEventHandle_t), ", but got ", handle_string.size());
+ AT_CHECK(device.type() == at::kCUDA, "Event can only be created on "
+ "CUDA devices, but got device type ", device.type())
THPObjectPtr ptr(type->tp_alloc(type, 0));
if (!ptr) {
THCPEvent* self = (THCPEvent *)ptr.get();
cudaIpcEventHandle_t handle;
- std::memcpy(&handle, handle_bytes, handle_size);
- new (&self->cuda_event) at::cuda::CUDAEvent(device_index, &handle);
+ std::memcpy(&handle, handle_string.c_str(), handle_string.size());
+ new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle);
return (PyObject *)ptr.release();
END_HANDLE_TH_ERRORS
static PyObject * THCPEvent_get_device(THCPEvent *self) {
HANDLE_TH_ERRORS
- return THPUtils_packInt64(self->cuda_event.device_index());
+ at::optional<at::Device> device = self->cuda_event.device();
+ if (!device) {
+ Py_RETURN_NONE;
+ }
+ return THPDevice_New(device.value());
END_HANDLE_TH_ERRORS
}
static PyMethodDef THCPEvent_methods[] = {
{(char*)"from_ipc_handle", (PyCFunction)THCPEvent_from_ipc_handle,
- METH_CLASS | METH_VARARGS, nullptr},
+ METH_CLASS | METH_VARARGS | METH_KEYWORDS, nullptr},
{(char*)"record", (PyCFunction)THCPEvent_record, METH_O, nullptr},
{(char*)"wait", (PyCFunction)THCPEvent_wait, METH_O, nullptr},
{(char*)"query", (PyCFunction)THCPEvent_query, METH_NOARGS, nullptr},
#include <torch/csrc/cuda/Stream.h>
-
-#include <torch/csrc/THP.h>
#include <torch/csrc/cuda/Module.h>
+#include <torch/csrc/Device.h>
+#include <torch/csrc/THP.h>
#include <c10/cuda/CUDAGuard.h>
static PyObject * THCPStream_get_device(THCPStream *self) {
HANDLE_TH_ERRORS
- return THPUtils_packInt64(self->cuda_stream.device_index());
+ return THPDevice_New(self->cuda_stream.device());
END_HANDLE_TH_ERRORS
}
return ctypes.c_void_p(self.cuda_event)
def __repr__(self):
- return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
+ if self.cuda_event:
+ return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
+ else:
+ return '<torch.cuda.Event uninitialized>'