From bb8ee2de0f5dd40bf10f7ea51ee697c9b39d0f6c Mon Sep 17 00:00:00 2001 From: Sebastian Messmer Date: Thu, 13 Dec 2018 18:38:55 -0800 Subject: [PATCH] Move TensorImpl::CopyFrom to caffe2::Tensor (2/2) (#14858) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14858 This diff doesn't change logic but just takes the existing code and moves it to caffe2::Tensor Reviewed By: ezyang Differential Revision: D13365817 fbshipit-source-id: bc73b27a793602cb14200dcdf357aa63233da43c --- c10/core/TensorImpl.h | 74 --------------------------------------------------- caffe2/core/tensor.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 76 deletions(-) diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h index 310900c..8c4be48 100644 --- a/c10/core/TensorImpl.h +++ b/c10/core/TensorImpl.h @@ -809,80 +809,6 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target { } /** - * @brief Copies the data from a source tensor, with a contex provided to - * carry out the underlying memcpy operation. This method respects - * caffe2_keep_on_shrink. - * - * After CopyFrom, this function guarantees that the destination tensor will - * have the same initialization state and dtype as src. This function - * preserves the DeviceType of the source tensor (so, e.g., if you allocate - * a tensor on CPU and then CopyFrom a CUDA tensor, that will to a - * CUDA-to-CPU transfer). - * - * 'async' parameter triggers async copy for CUDA tensors - */ - void CopyFrom(const TensorImpl& src, bool async = false) { - AT_ASSERT(!is_variable()); - AT_ASSERTM( - src.is_contiguous(), - "Right now only copy of contiguous source Tensor is supported."); - AT_ASSERTM( - src.storage_initialized(), - "Cannot copy from an uninitialized Tensor"); - - if (&src == this) { - return; - } - - // Test if we need to allocate a new storage - // Uninitialized storages are guaranteed to be uniquely owned, - // so we don't need to swap in this case. - // If the dtype changed, we need to reallocate storage. - if (dtype() != src.dtype()) { - // NB: copy preserves device_type - // This storage will get initialized by the mutable_data call below. - set_storage(at::Storage(device_type(), src.dtype())); - } - Resize(src.sizes()); - - if (numel() > 0) { - if (dtype().copy()) { - AT_ASSERTM( - device_type() == DeviceType::CPU, - "In CopyFrom source and dest tensors must both be CPU for " - "non-POD copy, but dest tensor was ", - device_type()); - AT_ASSERTM( - src.device_type() == DeviceType::CPU, - "In CopyFrom source and dest tensors must both be CPU for " - "non-POD copy, but src tensor was ", - src.device_type()); - dtype().copy()(src.data(), raw_mutable_data(data_type_), numel()); - } else { - // The following copy uses the current (thread local) stream for copying - // and also takes the GPU id from the device() field passed in. - // - // TODO: Potentially more enforcements are necessary to avoid accidental - // switch to sync copy if the currently set device is wrong. - // - // Specifically, we might need to switch to a different context device - // here explicitly to avoid relying on user synchronizing things - // properly. - // - // note: raw_mutable_data initializes device here - void* new_data = raw_mutable_data(dtype()); - CopyBytes( - numel() * itemsize(), - src.data(), - src.device(), - new_data, - device(), - async); - } - } - } - - /** * @brief Extends the outer-most dimension of this tensor by num elements, * preserving the existing data. * diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h index c12eb81..a91e854 100644 --- a/caffe2/core/tensor.h +++ b/caffe2/core/tensor.h @@ -104,8 +104,78 @@ class CAFFE2_API Tensor final { return impl_.get()->GetDevice(); } - void CopyFrom(const Tensor& src, bool async = false) const { - impl_.get()->CopyFrom(*src.impl_, async); + /** + * @brief Copies the data from a source tensor, with a contex provided to + * carry out the underlying memcpy operation. This method respects + * caffe2_keep_on_shrink. + * + * After CopyFrom, this function guarantees that the destination tensor will + * have the same initialization state and dtype as src. This function + * preserves the DeviceType of the source tensor (so, e.g., if you allocate + * a tensor on CPU and then CopyFrom a CUDA tensor, that will to a + * CUDA-to-CPU transfer). + * + * 'async' parameter triggers async copy for CUDA tensors + */ + void CopyFrom(const Tensor& src, bool async = false) { + AT_ASSERT(!impl_->is_variable()); + AT_ASSERTM( + src.impl_->is_contiguous(), + "Right now only copy of contiguous source Tensor is supported."); + AT_ASSERTM( + src.impl_->storage_initialized(), + "Cannot copy from an uninitialized Tensor"); + + if (src.impl_.get() == impl_.get()) { + return; + } + + // Test if we need to allocate a new storage + // Uninitialized storages are guaranteed to be uniquely owned, + // so we don't need to swap in dst case. + // If the dtype changed, we need to reallocate storage. + if (impl_->dtype() != src.impl_->dtype()) { + // NB: copy preserves device_type + // This storage will get initialized by the mutable_data call below. + impl_->set_storage(at::Storage(impl_->device_type(), src.impl_->dtype())); + } + impl_->Resize(src.impl_->sizes()); + + if (impl_->numel() > 0) { + if (impl_->dtype().copy()) { + AT_ASSERTM( + impl_->device_type() == ::at::DeviceType::CPU, + "In CopyFrom source and dest tensors must both be CPU for " + "non-POD copy, but dest tensor was ", + impl_->device_type()); + AT_ASSERTM( + src.impl_->device_type() == ::at::DeviceType::CPU, + "In CopyFrom source and dest tensors must both be CPU for " + "non-POD copy, but src tensor was ", + src.impl_->device_type()); + impl_->dtype().copy()(src.impl_->data(), impl_->raw_mutable_data(impl_->dtype()), impl_->numel()); + } else { + // The following copy uses the current (thread local) stream for copying + // and also takes the GPU id from the device() field passed in. + // + // TODO: Potentially more enforcements are necessary to avoid accidental + // switch to sync copy if the currently set device is wrong. + // + // Specifically, we might need to switch to a different context device + // here explicitly to avoid relying on user synchronizing things + // properly. + // + // note: raw_mutable_data initializes device here + void* new_data = impl_->raw_mutable_data(impl_->dtype()); + at::CopyBytes( + impl_->numel() * impl_->itemsize(), + src.impl_->data(), + src.impl_->device(), + new_data, + impl_->device(), + async); + } + } } /** -- 2.7.4