// Create a PyThreadState, but release the GIL. This lets pybind11::gil_scoped_acquire calls
// inside thread_main acquire the GIL without having to create a new
// PyThreadState each time.
-#ifdef IS_PYTHON_3_9_PLUS
+#if defined(IS_PYTHON_3_9_PLUS) || defined(USE_DEPLOY)
auto gil = std::make_unique<pybind11::gil_scoped_acquire>();
#else
pybind11::gil_scoped_acquire gil;
decrement_non_reentrant_thread_count();
}
-#ifdef IS_PYTHON_3_9_PLUS
+#if defined(IS_PYTHON_3_9_PLUS) || defined(USE_DEPLOY)
// Do not call PyEval_RestoreThread, PyThreadState_[Clear|DeleteCurrent] if runtime is finalizing
if (!Py_IsInitialized()) {
no_gil.disarm();
// TODO: call disarm rather than leak gil_scoped_acquired once PyThreadState_Clear can safely be called from finalize
+ // NOTE: deploy.cpp calls `PyInterpreterState_Delete` to destruct PyThreadState, so avoid use-after-free here.
gil.release();
}
#endif
I.self.attr("import_module")({"uses_distributed"});
}
}
+
+TEST(TorchpyTest, Autograd) {
+ torch::deploy::InterpreterManager m(2);
+ m.register_module_source("autograd_test", R"PYTHON(
+import torch
+
+x = torch.ones(5) # input tensor
+y = torch.zeros(3) # expected output
+w = torch.randn(5, 3, requires_grad=True)
+b = torch.randn(3, requires_grad=True)
+z = torch.matmul(x, w)+b
+loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)
+loss.backward()
+# result = w.grad
+result = torch.Tensor([1,2,3])
+)PYTHON");
+ at::Tensor w_grad0, w_grad1;
+ {
+ auto I = m.all_instances()[0].acquire_session();
+ w_grad0 = I.global("autograd_test", "result").toIValue().toTensor();
+ }
+ {
+ auto I = m.all_instances()[1].acquire_session();
+ w_grad1 = I.global("autograd_test", "result").toIValue().toTensor();
+ }
+ EXPECT_TRUE(w_grad0.equal(w_grad1));
+}