From: Gregory Chanan <gchanan@fb.com>
Date: Thu, 11 Apr 2019 20:22:49 +0000 (-0700)
Subject: Materialize a non-default device for C2 legacy storage. (#18605)
X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~264
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b6ee83a5b4a9706f6abde011aea158a07d4d76f4;p=platform%2Fupstream%2Fpytorch.git

Materialize a non-default device for C2 legacy storage. (#18605)

Summary:
It's not intended that Storages have 'default' CUDA devices, but this is allowable via the Storage::create_legacy codepath.

This also messages with device_caching, because the initial cache is obtained from the Storage, which may have a 'default' device.

Instead, we materialize a device by allocating 0 bytes via the allocator.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18605

Differential Revision: D14680620

Pulled By: gchanan

fbshipit-source-id: 6d43383d836e90beaf12bfe37c3f0506843f5432
---

diff --git a/c10/core/Storage.h b/c10/core/Storage.h
index 82d8b31..6d86119 100644
--- a/c10/core/Storage.h
+++ b/c10/core/Storage.h
@@ -41,11 +41,12 @@ struct C10_API Storage {
   // that can be temporarily created with Caffe2 APIs. See the note on top of
   // TensorImpl.h for details.
   static Storage create_legacy(at::Device device, caffe2::TypeMeta data_type) {
+    auto allocator = GetAllocator(device.type());
     return Storage(c10::make_intrusive<StorageImpl>(
             data_type,
             0,
-            at::DataPtr(nullptr, device),
-            GetAllocator(device.type()),
+            allocator->allocate(0), // materialize a non-default Device.
+            allocator,
             true));
   }
 
diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu
index d7f7fc6..0bd259e 100644
--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@@ -485,14 +485,18 @@ struct DefaultCUDAAllocator final : public at::Allocator {
     }
     switch (g_cuda_memory_pool_type) {
       case CudaMemoryPoolType::NONE:
-        CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
+        if (nbytes != 0) {
+          CUDA_ENFORCE(cudaMalloc(&ptr, nbytes));
+        }
         if (FLAGS_caffe2_gpu_memory_tracking) {
           g_size_map[ptr] = nbytes;
           g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
         }
         return {ptr, ptr, &Delete, at::Device(CUDA, CaffeCudaGetDevice())};
       case CudaMemoryPoolType::CUB:
-        CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
+        if (nbytes != 0) {
+          CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
+        }
         g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
         VLOG(2) << "CUB allocating pointer " << ptr << " on device "
                 << CaffeCudaGetDevice();