Materialize a non-default device for C2 legacy storage. (#18605)
authorGregory Chanan <gchanan@fb.com>
Thu, 11 Apr 2019 20:22:49 +0000 (13:22 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Thu, 11 Apr 2019 20:50:41 +0000 (13:50 -0700)
Summary:
It's not intended that Storages have 'default' CUDA devices, but this is allowable via the Storage::create_legacy codepath.

This also messages with device_caching, because the initial cache is obtained from the Storage, which may have a 'default' device.

Instead, we materialize a device by allocating 0 bytes via the allocator.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18605

Differential Revision: D14680620

Pulled By: gchanan

fbshipit-source-id: 6d43383d836e90beaf12bfe37c3f0506843f5432

c10/core/Storage.h
caffe2/core/context_gpu.cu

index 82d8b31..6d86119 100644 (file)
@@ -41,11 +41,12 @@ struct C10_API Storage {
   // that can be temporarily created with Caffe2 APIs. See the note on top of
   // TensorImpl.h for details.
   static Storage create_legacy(at::Device device, caffe2::TypeMeta data_type) {
+    auto allocator = GetAllocator(device.type());
     return Storage(c10::make_intrusive<StorageImpl>(
             data_type,
             0,
-            at::DataPtr(nullptr, device),
-            GetAllocator(device.type()),
+            allocator->allocate(0), // materialize a non-default Device.
+            allocator,
             true));
   }
 
index d7f7fc6..0bd259e 100644 (file)
@@ -485,14 +485,18 @@ struct DefaultCUDAAllocator final : public at::Allocator {
     }
     switch (g_cuda_memory_pool_type) {
       case CudaMemoryPoolType::NONE:
-        CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
+        if (nbytes != 0) {
+          CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
+        }
         if (FLAGS_caffe2_gpu_memory_tracking) {
           g_size_map[ptr] = nbytes;
           g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
         }
         return {ptr, ptr, &Delete, at::Device(CUDA, CaffeCudaGetDevice())};
       case CudaMemoryPoolType::CUB:
-        CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
+        if (nbytes != 0) {
+          CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
+        }
         g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
         VLOG(2) << "CUB allocating pointer " << ptr << " on device "
                 << CaffeCudaGetDevice();