Adding pin_memory kwarg to zeros, ones, empty, ... tensor constructors. (#18455)

author Vitaly Fedyunin <vitalyf@fb.com>

Tue, 2 Apr 2019 15:44:27 +0000 (08:44 -0700)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Tue, 2 Apr 2019 15:48:19 +0000 (08:48 -0700)
author Vitaly Fedyunin <vitalyf@fb.com>
Tue, 2 Apr 2019 15:44:27 +0000 (08:44 -0700)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Tue, 2 Apr 2019 15:48:19 +0000 (08:48 -0700)
diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp

index f4f0f42..1cdcb42 100644 (file)
--- a/aten/src/ATen/native/TensorFactories.cpp
+++ b/aten/src/ATen/native/TensorFactories.cpp
@@ -18,6 +18,7 @@
  #include <c10/core/TensorOptions.h>
  #include <TH/THRandom.h>
  #include <TH/THGenerator.hpp>
+#include <ATen/detail/CUDAHooksInterface.h>
  #include <c10/util/Exception.h>
  
  #include <algorithm>
@@ -93,7 +94,13 @@ Tensor empty_cpu(IntArrayRef size, const TensorOptions& options) {
    AT_ASSERT(!options.is_variable());  // is_variable should have been 'unpacked'  // TODO: remove this when Variable and Tensor are merged
    check_size_nonnegative(size);
  
-  auto* allocator = at::getCPUAllocator();
+  c10::Allocator* allocator;
+  if (options.pinned_memory()) {
+    allocator = detail::getCUDAHooks().getPinnedMemoryAllocator();
+  } else {
+    allocator = at::getCPUAllocator();
+  }
+
    int64_t nelements = prod_intlist(size);
    auto dtype = options.dtype();
    auto storage_impl = c10::make_intrusive<StorageImpl>(
diff --git a/aten/src/ATen/native/cuda/TensorFactories.cu b/aten/src/ATen/native/cuda/TensorFactories.cu

index c9bb377..64d30a7 100644 (file)
--- a/aten/src/ATen/native/cuda/TensorFactories.cu
+++ b/aten/src/ATen/native/cuda/TensorFactories.cu
@@ -46,6 +46,7 @@ Tensor& eye_out_cuda(Tensor& result, int64_t n, int64_t m) {
  Tensor empty_cuda(IntArrayRef size, const TensorOptions& options) {
    AT_ASSERT(options.backend() == at::Backend::CUDA);
    AT_ASSERT(!options.is_variable());  // is_variable should have been 'unpacked'  // TODO: remove this when Variable and Tensor are merged
+  AT_CHECK(!options.pinned_memory(), "Only dense CPU tensors can be pinned");
    check_size_nonnegative(size);
  
    auto* allocator = at::cuda::getCUDADeviceAllocator();
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml

index f8cd135..dc6153b 100644 (file)
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -56,7 +56,7 @@
    dispatch:
      CUDA: _cudnn_rnn_backward
  
-- func: _cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: _cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
    dispatch:
      CUDA: _cudnn_init_dropout_state
@@ -230,13 +230,13 @@
  - func: any(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
  
-- func: arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: arange(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: arange(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: arange(Scalar start, Scalar end, Scalar step, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: arange(Scalar start, Scalar end, Scalar step, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: arange(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
@@ -333,10 +333,10 @@
      CPU: baddbmm_out_cpu
      CUDA: baddbmm_out_cuda
  
-- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: bartlett_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: bartlett_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
@@ -396,10 +396,10 @@
      CPU: _bincount_cpu
      CUDA: _bincount_cuda
  
-- func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: blackman_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: blackman_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: bmm(Tensor self, Tensor mat2) -> Tensor
@@ -839,7 +839,7 @@
      CPU: _embedding_bag_dense_backward_cpu
      CUDA: _embedding_bag_dense_backward_cuda
  
-- func: empty(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: empty(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
    cpu_half: True
    cpu_bool: True
@@ -867,11 +867,11 @@
    matches_jit_signature: True
    device_guard: False
  
-- func: empty_like(Tensor self, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: empty_like(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
    device_guard: False
  
-- func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    cpu_half: True
    matches_jit_signature: True
    dispatch:
@@ -956,10 +956,10 @@
    variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
    device_guard: False
  
-- func: eye(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: eye(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: eye(int n, int m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: eye(int n, int m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: eye(int n, *, Tensor(a!) out) -> Tensor(a!)
@@ -1003,7 +1003,7 @@
      CPU: _floor_out_cpu
      CUDA: _floor_out_cuda
  
-- func: full(int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: full(int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: full(int[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)
@@ -1012,7 +1012,7 @@
  - func: full_like(Tensor self, Scalar fill_value) -> Tensor
    matches_jit_signature: True
  
-- func: full_like(Tensor self, Scalar fill_value, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: full_like(Tensor self, Scalar fill_value, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
  # NOTE [ grid_sampler Native Functions ]
@@ -1051,22 +1051,22 @@
      CPU: grid_sampler_3d_backward_cpu
      CUDA: grid_sampler_3d_backward_cuda
  
-- func: hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: hann_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: hann_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: hamming_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: hamming_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: hamming_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: hamming_window(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: hamming_window(int window_length, bool periodic, float alpha, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: hamming_window(int window_length, bool periodic, float alpha, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: hamming_window(int window_length, bool periodic, float alpha, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: hamming_window(int window_length, bool periodic, float alpha, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor
@@ -1238,7 +1238,7 @@
  - func: fbgemm_is_cpu_supported() -> bool
    matches_jit_signature: True
  
-- func: linspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: linspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: linspace(Scalar start, Scalar end, int steps=100, *, Tensor(a!) out) -> Tensor(a!)
@@ -1323,7 +1323,7 @@
    matches_jit_signature: True
    variants: function, method
  
-- func: logspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: logspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: logspace(Scalar start, Scalar end, int steps=100, *, Tensor(a!) out) -> Tensor(a!)
@@ -1669,7 +1669,7 @@
    matches_jit_signature: True
    variants: function
  
-- func: ones(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: ones(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: ones(int[] size, *, Tensor(a!) out) -> Tensor(a!)
@@ -1678,7 +1678,7 @@
  - func: ones_like(Tensor self) -> Tensor
    matches_jit_signature: True
  
-- func: ones_like(Tensor self, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: ones_like(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
  - func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
@@ -1717,13 +1717,13 @@
    matches_jit_signature: True
    variants: function, method
  
-- func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: rand(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: rand(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: rand(int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: rand(int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
  
  - func: rand(int[] size, *, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
@@ -1734,18 +1734,18 @@
  - func: rand_like(Tensor self) -> Tensor
    matches_jit_signature: True
  
-- func: rand_like(Tensor self, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: rand_like(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
-- func: randint(int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randint(int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: randint(int high, int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randint(int high, int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
  
-- func: randint(int low, int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randint(int low, int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: randint(int low, int high, int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randint(int low, int high, int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
  
  - func: randint(int high, int[] size, *, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
@@ -1765,16 +1765,16 @@
  - func: randint_like(Tensor self, int low, int high) -> Tensor
    matches_jit_signature: True
  
-- func: randint_like(Tensor self, int high, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: randint_like(Tensor self, int high, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
-- func: randint_like(Tensor self, int low, int high, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: randint_like(Tensor self, int low, int high, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
-- func: randn(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randn(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: randn(int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randn(int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
  
  - func: randn(int[] size, *, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
@@ -1785,13 +1785,13 @@
  - func: randn_like(Tensor self) -> Tensor
    matches_jit_signature: True
  
-- func: randn_like(Tensor self, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: randn_like(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
-- func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: randperm(int n, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: randperm(int n, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
  
  - func: randperm(int n, *, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
@@ -1802,10 +1802,10 @@
      CPU: randperm_out_cpu
      CUDA: randperm_out_cuda
  
-- func: range(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: range(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: range(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
@@ -2422,7 +2422,7 @@
    matches_jit_signature: True
    variants: function
  
-- func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
  - func: zeros(int[] size, *, Tensor(a!) out) -> Tensor(a!)
@@ -2431,7 +2431,7 @@
  - func: zeros_like(Tensor self) -> Tensor
    matches_jit_signature: True
  
-- func: zeros_like(Tensor self, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: zeros_like(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
  - func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor
@@ -2759,26 +2759,26 @@
  
  # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
  # the default would never make sense.
-- func: sparse_coo_tensor(int[] size, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: sparse_coo_tensor(int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
  
-- func: sparse_coo_tensor(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: sparse_coo_tensor(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: sparse_coo_tensor(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: sparse_coo_tensor(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None) -> Tensor
+- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
    matches_jit_signature: True
  
-- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
    dispatch:
      SparseCPU: new_with_dims_sparse
      SparseCUDA: new_with_dims_sparse
    requires_tensor: True
  
-- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, int[] size, Tensor indices, Tensor values, *, ScalarType dtype, Layout layout, Device device) -> Tensor
+- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, int[] size, Tensor indices, Tensor values, *, ScalarType dtype, Layout layout, Device device, bool pin_memory) -> Tensor
    matches_jit_signature: True
    dispatch:
      SparseCPU: new_with_dims_and_tensor_sparse
@@ -2983,7 +2983,7 @@
  # to(Device) must not exist because all constructors of Device also works for
  # TensorOptions. Otherwise, an ambiguity error is thrown.
  # See NOTE [ TensorOptions Constructors ].
-- func: to(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool non_blocking=False, bool copy=False) -> Tensor
+- func: to(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory, bool non_blocking=False, bool copy=False) -> Tensor
    matches_jit_signature: True
    variants: method
    device_guard: False
diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp

index d3278c6..67f3e22 100644 (file)
--- a/aten/src/ATen/native/sparse/SparseTensor.cpp
+++ b/aten/src/ATen/native/sparse/SparseTensor.cpp
@@ -108,6 +108,7 @@ SparseTensor new_with_dims_and_tensor_sparse(
  
  /** Empty init **/
  Tensor empty_sparse(IntArrayRef size, const TensorOptions& options) {
+  AT_CHECK(!options.pinned_memory(), "Only dense CPU tensors can be pinned");
    return new_with_dims_sparse(size.size(), 0, size, options);
  }
  
diff --git a/aten/src/ATen/native_parse.py b/aten/src/ATen/native_parse.py

index af59313..e93d32e 100644 (file)
--- a/aten/src/ATen/native_parse.py
+++ b/aten/src/ATen/native_parse.py
@@ -202,19 +202,15 @@ def parse_arguments(args, func_variants, declaration, func_return):
              {'name': 'dtype', 'type': 'ScalarType', 'is_nullable': False, 'annotation': None},
              {'name': 'layout', 'type': 'Layout', 'is_nullable': False, 'annotation': None},
              {'name': 'device', 'type': 'Device', 'is_nullable': False, 'annotation': None},
+            {'name': 'pin_memory', 'type': 'bool', 'is_nullable': False, 'annotation': None},
          ]
      ]
      supported_topt_arguments.append(copy.deepcopy(supported_topt_arguments[0]))
-    supported_topt_arguments[1][0]['kwarg_only'] = True
-    supported_topt_arguments[1][1]['kwarg_only'] = True
-    supported_topt_arguments[1][2]['kwarg_only'] = True
+    for arg in supported_topt_arguments[1]:
+        arg.update({'kwarg_only': True})
      supported_topt_arguments.append(copy.deepcopy(supported_topt_arguments[1]))
-    supported_topt_arguments[2][0]['default'] = 'c10::nullopt'
-    supported_topt_arguments[2][1]['default'] = 'c10::nullopt'
-    supported_topt_arguments[2][2]['default'] = 'c10::nullopt'
-    supported_topt_arguments[2][0]['is_nullable'] = True
-    supported_topt_arguments[2][1]['is_nullable'] = True
-    supported_topt_arguments[2][2]['is_nullable'] = True
+    for arg in supported_topt_arguments[2]:
+        arg.update({'default': 'c10::nullopt', 'is_nullable': True})
  
      corresponding_topts = [
          {'type': 'TensorOptions', 'name': 'options', 'is_nullable': False, 'annotation': None},
@@ -227,29 +223,29 @@ def parse_arguments(args, func_variants, declaration, func_return):
      def check_topt_representation(topt_representation):
          for idx, supported_topt in enumerate(supported_topt_arguments):
              matches = True
-            matches = matches and topt_representation[0] == supported_topt[0]
-            matches = matches and topt_representation[1] == supported_topt[1]
-            matches = matches and topt_representation[2] == supported_topt[2]
+            for i, topt in enumerate(supported_topt):
+                matches = matches and topt_representation[i] == topt
              if matches:
                  return corresponding_topts[idx]
          return None
  
      def is_tensor_option(argument):
-        return argument['name'] in ['dtype', 'layout', 'device']
+        return argument['name'] in ['dtype', 'layout', 'device', 'pin_memory']
  
      new_arguments = []
      idx = 0
      while idx < len(arguments):
          argument = arguments[idx]
-        if is_tensor_option(argument) and len(arguments) - idx >= 3:
+        number_of_arguments = len(supported_topt_arguments[0])
+        if is_tensor_option(argument) and len(arguments) - idx >= number_of_arguments:
              topt_representation = []
-            for i in range(3):
+            for i in range(number_of_arguments):
                  argument = arguments[idx]
                  if not is_tensor_option(argument):
                      break
                  topt_representation.append(argument)
                  idx += 1
-            if len(topt_representation) == 3:
+            if len(topt_representation) == number_of_arguments:
                  merged_argument = check_topt_representation(topt_representation)
                  assert merged_argument, \
                      "Unsupported combination of TensorOptions {}, the only currently supported combinations are {}"\
diff --git a/c10/core/TensorOptions.h b/c10/core/TensorOptions.h

index 1e7b017..0105b17 100644 (file)
--- a/c10/core/TensorOptions.h
+++ b/c10/core/TensorOptions.h
@@ -100,11 +100,13 @@ struct C10_API TensorOptions {
    TensorOptions()
      : requires_grad_(false)
      , is_variable_(false)
+    , pinned_memory_(false)
      , has_device_(false)
      , has_dtype_(false)
      , has_layout_(false)
      , has_requires_grad_(false)
      , has_is_variable_(false)
+    , has_pinned_memory_(false)
      {}
  
    /// Constructs a `TensorOptions` object with the given layout.
@@ -232,6 +234,14 @@ struct C10_API TensorOptions {
      return r;
    }
  
+
+  /// Sets the `pinned_memory` property on the `TensorOptions`.
+  C10_NODISCARD TensorOptions pinned_memory(c10::optional<bool> pinned_memory) const noexcept {
+    TensorOptions r = *this;
+    r.set_pinned_memory(pinned_memory);
+    return r;
+  }
+
    /// Returns the device of the `TensorOptions`.
    Device device() const noexcept {
      return has_device_ ? device_ : Device(kCPU);
@@ -312,12 +322,31 @@ struct C10_API TensorOptions {
      return has_is_variable_;
    }
  
+
+  /// Returns the `pinned_memory` property of the `TensorOptions`.
+  bool pinned_memory() const noexcept {
+    return has_pinned_memory_ ? pinned_memory_ : false;
+  }
+
+  /// Returns whether the `pinned_memory` is specified.
+  bool has_pinned_memory() const noexcept {
+    return has_pinned_memory_;
+  }
+
+
    /// Returns the `is_variable` property of the `TensorOptions`, or
    /// `c10::nullopt` if `is_variable` is not specified.
    c10::optional<bool> is_variable_opt() const noexcept {
      return has_is_variable_ ? c10::make_optional(is_variable_) : c10::nullopt;
    }
  
+
+  /// Returns the `pinned_memory` property of the `TensorOptions`, or
+  /// `c10::nullopt` if `pinned_memory` is not specified.
+  c10::optional<bool> pinned_memory_opt() const noexcept {
+    return has_pinned_memory_ ? c10::make_optional(pinned_memory_) : c10::nullopt;
+  }
+
    // Resolves the ATen backend specified by the current construction axes.
    Backend backend() const noexcept {
      return at::tensorTypeIdToBackend(computeTensorTypeId());
@@ -438,6 +467,16 @@ struct C10_API TensorOptions {
      }
    }
  
+  /// Mutably set the `pinned_memory` property of `TensorOptions`.
+  void set_pinned_memory(c10::optional<bool> pinned_memory) & noexcept {
+    if (pinned_memory) {
+      pinned_memory_ = *pinned_memory;
+      has_pinned_memory_ = true;
+    } else {
+      has_pinned_memory_ = false;
+    }
+  }
+
    // WARNING: If you edit TensorOptions to add more options, you
    // must adjust the implementation of Tensor::options
  
@@ -453,12 +492,15 @@ struct C10_API TensorOptions {
  
    bool requires_grad_     : 1;
    bool is_variable_       : 1;
+  bool pinned_memory_     : 1;
+
  
    bool has_device_        : 1;
    bool has_dtype_         : 1;
    bool has_layout_        : 1;
    bool has_requires_grad_ : 1;
    bool has_is_variable_   : 1;
+  bool has_pinned_memory_ : 1;
  };
  
  // We should aspire to fit in one machine-size word; but a size greater than two
diff --git a/test/test_torch.py b/test/test_torch.py

index 855e8f9..5de9045 100644 (file)
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -9815,6 +9815,40 @@ tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
          self.assertEqual(pinned, x)
          self.assertNotEqual(pinned.data_ptr(), x.data_ptr())
  
+    @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
+    def test_pin_memory_from_constructor(self):
+
+        def _get_like(t, **kwargs):
+            return [
+                torch.rand_like(t, **kwargs),
+                torch.randn_like(t, **kwargs),
+                torch.empty_like(t, **kwargs),
+                torch.full_like(t, 4, **kwargs),
+                torch.zeros_like(t, **kwargs),
+                torch.ones_like(t, **kwargs),
+            ]
+
+        def _get_tensors(**kwargs):
+            return [
+                torch.tensor([10,11], **kwargs),
+                torch.randn(3, 5, **kwargs),
+                torch.rand(3, **kwargs),
+                # torch.randint(3,5, **kwargs), // unsupported
+                torch.zeros(3, **kwargs),
+                torch.randperm(3, **kwargs),
+                torch.empty(6, **kwargs),
+                torch.ones(6, **kwargs),
+                torch.eye(6, **kwargs),
+                torch.arange(3, 5, **kwargs),]
+
+        pinned_tensors = _get_tensors(pin_memory=True) + _get_like(torch.empty(5, dtype=torch.float64), pin_memory=True)
+        for x in pinned_tensors:
+            self.assertTrue(x.is_pinned())
+
+        tensors = _get_tensors() + _get_like(torch.empty(5, dtype=torch.float64, pin_memory=True))
+        for x in tensors:
+            self.assertFalse(x.is_pinned())
+
      @unittest.skipIf(not TEST_NUMPY, "Numpy not found")
      def test_numpy_unresizable(self):
          x = np.zeros((2, 2))
diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py

index 7201f77..9f6ccf8 100644 (file)
--- a/tools/autograd/gen_python_functions.py
+++ b/tools/autograd/gen_python_functions.py
@@ -157,7 +157,8 @@ const auto options = TensorOptions()
      .dtype(${dtype})
      .device(${device})
      .layout(${layout}.layout)
-    .requires_grad(${requires_grad});
+    .requires_grad(${requires_grad})
+    .pinned_memory(${pin_memory});
  """)
  
  
@@ -429,9 +430,9 @@ def create_python_bindings(python_functions, has_self, is_module=False):
                  arg_idx += 1
  
          if 'layout' in (a['name'] for a in python_binding_arguments):
-            layout_idx, device_idx, requires_grad_idx = (arg_idx, arg_idx + 1, arg_idx + 2)
+            layout_idx, device_idx, pin_memory_idx, requires_grad_idx = (arg_idx, arg_idx + 1, arg_idx + 2, arg_idx + 3)
          else:
-            device_idx, requires_grad_idx = (arg_idx, arg_idx + 1)
+            device_idx, pin_memory_idx, requires_grad_idx = (arg_idx, arg_idx + 1, arg_idx + 2)
  
          device = None
          for arg in python_binding_arguments:
@@ -459,9 +460,11 @@ def create_python_bindings(python_functions, has_self, is_module=False):
                      has_device_bind = True
              elif arg['name'] == 'requires_grad' and arg['simple_type'] == 'bool':
                  requires_grad = parse_arg(arg, requires_grad_idx)[0]
+            elif arg['name'] == 'pin_memory' and arg['simple_type'] == 'bool':
+                pin_memory = parse_arg(arg, pin_memory_idx)[0]
              else:
                  raise RuntimeError(("found {} in python_binding_arguments but only "
-                                    "\"bool requires_grad\", \"ScalarType dtype\", \"Layout layout\", "
+                                    "\"bool pin_memory\", \"bool requires_grad\", \"ScalarType dtype\", \"Layout layout\", "
                                      "\"Device device\" are supported".format(arg)))
  
          dtype = parsed_type_args[0] if parsed_type_args else None
@@ -470,7 +473,8 @@ def create_python_bindings(python_functions, has_self, is_module=False):
                  'dtype': dtype,
                  'layout': layout,
                  'device': device,
-                'requires_grad': requires_grad
+                'requires_grad': requires_grad,
+                'pin_memory': pin_memory,
              }))
              formal_args.append('const TensorOptions & options')
              actuals.append('options')
@@ -620,6 +624,15 @@ def create_python_bindings(python_functions, has_self, is_module=False):
                  'python_default_init': py_default_device
              }
              python_binding_arguments.append(device_arg)
+            pin_memory_arg = {
+                'default': False,
+                'dynamic_type': 'bool',
+                'kwarg_only': True,
+                'name': 'pin_memory',
+                'type': 'bool',
+                'simple_type': 'bool',
+            }
+            python_binding_arguments.append(pin_memory_arg)
          if is_factory_or_like_function:
              requires_grad_arg = {
                  'default': False,
diff --git a/tools/autograd/templates/python_torch_functions.cpp b/tools/autograd/templates/python_torch_functions.cpp

index 1a0480b..49110c6 100644 (file)
--- a/tools/autograd/templates/python_torch_functions.cpp
+++ b/tools/autograd/templates/python_torch_functions.cpp
@@ -96,11 +96,11 @@ static PyObject * THPVariable_arange(PyObject* self, PyObject* args, PyObject* k
  {
    HANDLE_TH_ERRORS
    static PythonArgParser parser({
-    "arange(Scalar end, *, Tensor out=None, ScalarType dtype=None, Layout layout=torch.strided, Device device=None, bool requires_grad=False)",
-    "arange(Scalar start, Scalar end, Scalar step=1, *, Tensor out=None, ScalarType dtype=None, Layout layout=torch.strided, Device device=None, bool requires_grad=False)",
+    "arange(Scalar end, *, Tensor out=None, ScalarType dtype=None, Layout layout=torch.strided, Device device=None, bool pin_memory=False, bool requires_grad=False)",
+    "arange(Scalar start, Scalar end, Scalar step=1, *, Tensor out=None, ScalarType dtype=None, Layout layout=torch.strided, Device device=None, bool pin_memory=False, bool requires_grad=False)",
    });
  
-  ParsedArgs<8> parsed_args;
+  ParsedArgs<9> parsed_args;
    auto r = parser.parse(args, kwargs, parsed_args);
  
    if (r.idx == 0) {
@@ -112,12 +112,14 @@ static PyObject * THPVariable_arange(PyObject* self, PyObject* args, PyObject* k
            .dtype(scalarType)
            .device(r.device(4))
            .layout(r.layout(3).layout)
-          .requires_grad(r.toBool(5));
+          .requires_grad(r.toBool(6))
+          .pinned_memory(r.toBool(5));
        return wrap(dispatch_arange(end, options));
      } else {
+      AT_ASSERTM(!r.toBool(5), " `pin_memory` and `out` parameters are incompatible");
        check_out_type_matches(r.tensor(1), r.scalartype(2), r.isNone(2), r.layout(3), r.isNone(3),
                               r.device(4), r.isNone(4));
-      return wrap(dispatch_arange(r.scalar(0), r.tensor(1)).set_requires_grad(r.toBool(5)));
+      return wrap(dispatch_arange(r.scalar(0), r.tensor(1)).set_requires_grad(r.toBool(6)));
      }
    } else if (r.idx == 1) {
      if (r.isNone(3)) {
@@ -130,12 +132,14 @@ static PyObject * THPVariable_arange(PyObject* self, PyObject* args, PyObject* k
            .dtype(scalarType)
            .device(r.device(6))
            .layout(r.layout(5).layout)
-          .requires_grad(r.toBool(7));
+          .requires_grad(r.toBool(8))
+          .pinned_memory(r.toBool(7));
        return wrap(dispatch_arange(start, end, step, options));
      } else {
+      AT_ASSERTM(!r.toBool(7), " `pin_memory` and `out` parameters are incompatible");
        check_out_type_matches(r.tensor(3), r.scalartype(4), r.isNone(4), r.layout(5), r.isNone(5),
                                 r.device(6), r.isNone(6));
-      return wrap(dispatch_arange(r.scalar(0), r.scalar(1), r.scalar(2), r.tensor(3)).set_requires_grad(r.toBool(7)));
+      return wrap(dispatch_arange(r.scalar(0), r.scalar(1), r.scalar(2), r.tensor(3)).set_requires_grad(r.toBool(8)));
      }
    }
    Py_RETURN_NONE;
diff --git a/tools/jit/gen_jit_dispatch.py b/tools/jit/gen_jit_dispatch.py

index 149832b..2c62bbc 100644 (file)
--- a/tools/jit/gen_jit_dispatch.py
+++ b/tools/jit/gen_jit_dispatch.py
@@ -59,6 +59,7 @@ TYPE_MAP = {
      'int64_t?': 'int?',
      'double': 'float',
      'bool': 'bool',
+    'bool?': 'bool?',
      'Generator': 'Generator?',
  }
  
@@ -104,6 +105,7 @@ FROM_IVALUE = {
      'Tensor?[]': 'toListOfOptionalTensor({})',
      'TensorList': '{}.toTensorList()->elements()',
      'bool': '{}.toBool()',
+    'bool?': '{}.toOptional<bool>()',
      'double': '{}.toDouble()',
      'int64_t': '{}.toInt()',
      'int64_t?': '{}.toOptional<int64_t>()',
@@ -134,14 +136,16 @@ CALL_NAMESPACE_WITH_TENSOR_OPTIONS = CodeTemplate("""\
  const auto options = TensorOptions()
          .dtype(${dtype})
          .layout(${layout})
-        .device(${device});
+        .device(${device})
+        .pinned_memory(${pin_memory});
  auto result_ = torch::${name}(${args_with_tensor_options});
  """)
  CALL_METHOD_WITH_TENSOR_OPTIONS = CodeTemplate("""\
  const auto options = TensorOptions()
          .dtype(${dtype})
          .layout(${layout})
-        .device(${device});
+        .device(${device})
+        .pinned_memory(${pin_memory});;
  auto result_ = (${first}).${name}(${args_with_tensor_options});
  """)
  
@@ -243,15 +247,18 @@ def gen_jit_dispatch(declarations, out, template_path):
              dtype = args[tensor_options_arg_index]
              layout = args[tensor_options_arg_index + 1]
              device = args[tensor_options_arg_index + 2]
+            pin_memory = args[tensor_options_arg_index + 3]
              args_with_tensor_options = args[:tensor_options_arg_index] + \
-                ['options'] + args[(tensor_options_arg_index + 3):]
+                ['options'] + args[(tensor_options_arg_index + 4):]
              if is_namespace_function:
                  return CALL_NAMESPACE_WITH_TENSOR_OPTIONS.substitute(
-                    name=decl['name'], dtype=dtype, layout=layout, device=device,
+                    name=decl['name'], dtype=dtype, layout=layout,
+                    device=device, pin_memory=pin_memory,
                      args_with_tensor_options=pack_arguments(args_with_tensor_options))
              else:
                  return CALL_METHOD_WITH_TENSOR_OPTIONS.substitute(
-                    name=decl['name'], dtype=dtype, layout=layout, device=device,
+                    name=decl['name'], dtype=dtype, layout=layout,
+                    device=device, pin_memory=pin_memory,
                      args_with_tensor_options=pack_arguments(args_with_tensor_options[1:]),
                      first=args_with_tensor_options[0], num_inputs=num_inputs)
          else:
@@ -350,21 +357,19 @@ def gen_jit_dispatch(declarations, out, template_path):
              {'name': 'layout', 'simple_type': 'Layout'},
              # device is specified as an IntArrayRef of { at::Device::Type, device_id }
              {'name': 'device', 'simple_type': 'Device'},
+            # pin_memory is specified as a boolean
+            {'name': 'pin_memory', 'simple_type': 'bool'},
          ]
          # TODO: Don't repack this into TensorOptions. Needs various changes in downstream code.
          if 'default' in arg:
-            tensor_options_expansion[0]['simple_type'] += '?'
-            tensor_options_expansion[1]['simple_type'] += '?'
-            tensor_options_expansion[2]['simple_type'] += '?'
-            tensor_options_expansion[0]['default'] = 'None'
-            tensor_options_expansion[1]['default'] = 'None'
-            tensor_options_expansion[2]['default'] = 'None'
+            for el in tensor_options_expansion:
+                el['simple_type'] += '?'
+                el['default'] = 'None'
          if 'default' in arg and arg['default'] == 'at::kLong':
              tensor_options_expansion[0]['default'] = 'long'
          if 'kwarg_only' in arg and arg['kwarg_only']:
-            tensor_options_expansion[0]['kwarg_only'] = True
-            tensor_options_expansion[1]['kwarg_only'] = True
-            tensor_options_expansion[2]['kwarg_only'] = True
+            for el in tensor_options_expansion:
+                el['kwarg_only'] = True
          return tensor_options_expansion
  
      additional_jit_decls = []
diff --git a/tools/pyi/gen_pyi.py b/tools/pyi/gen_pyi.py

index cd95a25..132ba93 100644 (file)
--- a/tools/pyi/gen_pyi.py
+++ b/tools/pyi/gen_pyi.py
@@ -239,7 +239,11 @@ def generate_type_hints(fname, decls, is_tensor=False):
                  if a.get('kwarg_only', False) and render_kw_only_separator:
                      python_args.append('*')
                      render_kw_only_separator = False
-                python_args.append(arg_to_type_hint(a))
+                try:
+                    python_args.append(arg_to_type_hint(a))
+                except Exception:
+                    print("Error while processing function %s" % fname)
+                    raise
  
          if is_tensor:
              if 'self: Tensor' in python_args:
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py

index 6b18148..55ff438 100644 (file)
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@@ -17,6 +17,8 @@ new_common_args = parse_kwargs("""
          Default: if None, same :class:`torch.device` as this tensor.
      requires_grad (bool, optional): If autograd should record operations on the
          returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
  """)
  
  add_docstr_all('new_tensor',
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py

index 580481f..a96c4e4 100644 (file)
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@@ -63,6 +63,8 @@ factory_common_args = parse_kwargs("""
          for CPU tensor types and the current CUDA device for CUDA tensor types.
      requires_grad (bool, optional): If autograd should record operations on the
          returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
  """)
  
  factory_like_common_args = parse_kwargs("""
@@ -75,6 +77,8 @@ factory_like_common_args = parse_kwargs("""
          Default: if ``None``, defaults to the device of :attr:`input`.
      requires_grad (bool, optional): If autograd should record operations on the
          returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
  """)
  
  factory_data_common_args = parse_kwargs("""
@@ -88,6 +92,8 @@ factory_data_common_args = parse_kwargs("""
          for CPU tensor types and the current CUDA device for CUDA tensor types.
      requires_grad (bool, optional): If autograd should record operations on the
          returned tensor. Default: ``False``.
+    pin_memory (bool, optional): If set, returned tensor would be allocated in
+        the pinned memory. Works only for CPU tensors. Default: ``False``.
  """)
  
  add_docstr(torch.abs,
@@ -3964,7 +3970,7 @@ Example::
  
  add_docstr(torch.tensor,
             r"""
-tensor(data, dtype=None, device=None, requires_grad=False) -> Tensor
+tensor(data, dtype=None, device=None, requires_grad=False, pin_memory=False) -> Tensor
  
  Constructs a tensor with :attr:`data`.
  
@@ -3988,6 +3994,7 @@ Args:
      {dtype}
      {device}
      {requires_grad}
+    {pin_memory}
  
  
  Example::
@@ -5546,7 +5553,7 @@ Example::
  
  add_docstr(torch.empty,
             r"""
-empty(*sizes, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+empty(*sizes, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, pin_memory=False) -> Tensor
  
  Returns a tensor filled with uninitialized data. The shape of the tensor is
  defined by the variable argument :attr:`sizes`.
@@ -5559,6 +5566,7 @@ Args:
      {layout}
      {device}
      {requires_grad}
+    {pin_memory}
  
  Example::
  
diff --git a/torch/csrc/jit/ir.cpp b/torch/csrc/jit/ir.cpp

index b0ef049..eac70ef 100644 (file)
--- a/torch/csrc/jit/ir.cpp
+++ b/torch/csrc/jit/ir.cpp
@@ -812,19 +812,19 @@ bool Node::isNondeterministic() const {
        "aten::poisson(Tensor self, Generator? generator) -> Tensor",
        "aten::rrelu(Tensor self, Scalar lower, Scalar upper, bool training, Generator? generator) -> Tensor",
        "aten::rrelu_with_noise(Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, Generator? generator) -> Tensor",
-      "aten::rand(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
+      "aten::rand(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
        "aten::rand_like(Tensor self) -> Tensor",
-      "aten::rand_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
-      "aten::randint(int high, int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-      "aten::randint(int low, int high, int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
+      "aten::rand_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+      "aten::randint(int high, int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+      "aten::randint(int low, int high, int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
        "aten::randint_like(Tensor self, int high) -> Tensor",
        "aten::randint_like(Tensor self, int low, int high) -> Tensor",
-      "aten::randint_like(Tensor self, int high, *, int dtype, int layout, Device device) -> Tensor",
-      "aten::randint_like(Tensor self, int low, int high, *, int dtype, int layout, Device device) -> Tensor",
-      "aten::randn(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
+      "aten::randint_like(Tensor self, int high, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+      "aten::randint_like(Tensor self, int low, int high, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+      "aten::randn(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
        "aten::randn_like(Tensor self) -> Tensor",
-      "aten::randn_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
-      "aten::randperm(int n, *, int? dtype, int? layout, Device? device) -> Tensor"};
+      "aten::randn_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+      "aten::randperm(int n, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor"};
  
    if (nondeterministic_ops.find(this) == nullptr) {
      return false;
diff --git a/torch/csrc/jit/operator.cpp b/torch/csrc/jit/operator.cpp

index 198b3ed..f8fbdc9 100644 (file)
--- a/torch/csrc/jit/operator.cpp
+++ b/torch/csrc/jit/operator.cpp
@@ -387,7 +387,6 @@ void registerOperator(Operator&& op) {
            ". File a bug to add a case for this operator.\n");
      }
    }
-
    getRegistry().registerOperator(std::move(op));
  }
  
@@ -467,8 +466,6 @@ bool Operator::matches(const Node* node) const {
  
    // too many inputs
    if (!schema().is_vararg() && actuals.size() != formals.size()) {
-    // std::cout << "not all inputs used\n" << input_i << " " << inputs_size <<
-    // "\n";
      return false;
    }
  
diff --git a/torch/csrc/jit/passes/shape_analysis.cpp b/torch/csrc/jit/passes/shape_analysis.cpp

index 8ac159b..bd162cf 100644 (file)
--- a/torch/csrc/jit/passes/shape_analysis.cpp
+++ b/torch/csrc/jit/passes/shape_analysis.cpp
@@ -1156,14 +1156,14 @@ class ShapePropagator {
      //   - has ScalarType dtype, Layeout layout and Device device arguments
      static const register_formula_for like_factories_with_options{
          {
-            "aten::empty_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::full_like(Tensor self, Scalar fill_value, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::ones_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::rand_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::randint_like(Tensor self, int high, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::randint_like(Tensor self, int low, int high, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::randn_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
-            "aten::zeros_like(Tensor self, *, int dtype, int layout, Device device) -> Tensor",
+            "aten::empty_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::full_like(Tensor self, Scalar fill_value, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::ones_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::rand_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::randint_like(Tensor self, int high, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::randint_like(Tensor self, int low, int high, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::randn_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
+            "aten::zeros_like(Tensor self, *, int dtype, int layout, Device device, bool pin_memory) -> Tensor",
          },
          [](Node* node) -> type_vec_t {
            if (auto type = node->namedInput(attr::self)
@@ -1185,14 +1185,14 @@ class ShapePropagator {
      //   arguments
      static const register_formula_for size_factories_with_options{
          {
-            "aten::empty(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::full(int[] size, Scalar fill_value, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::ones(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::rand(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::randn(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::zeros(int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::randint(int high, int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
-            "aten::randint(int low, int high, int[] size, *, int? dtype, int? layout, Device? device) -> Tensor",
+            "aten::empty(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::full(int[] size, Scalar fill_value, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::ones(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::rand(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::randn(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::zeros(int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::randint(int high, int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
+            "aten::randint(int low, int high, int[] size, *, int? dtype, int? layout, Device? device, bool? pin_memory) -> Tensor",
          },
          [](Node* node) -> type_vec_t {
            if (auto maybe_size = node->get<std::vector<int64_t>>(attr::size)) {
diff --git a/torch/csrc/jit/tracer.cpp b/torch/csrc/jit/tracer.cpp

index 38f7aa4..de64e69 100644 (file)
--- a/torch/csrc/jit/tracer.cpp
+++ b/torch/csrc/jit/tracer.cpp
@@ -408,6 +408,7 @@ void addInputs(Node* n, const char* name, const at::TensorOptions& options) {
    addInputs(n, name, at::typeMetaToScalarType(options.dtype()));
    addInputs(n, name, options.layout());
    addInputs(n, name, options.device());
+  addInputs(n, name, options.pinned_memory());
  }
  
  void addInputs(Node* n, const char* name, at::IntArrayRef value) {
diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp

index 6339cc2..d7c693f 100644 (file)
--- a/torch/csrc/utils/tensor_new.cpp
+++ b/torch/csrc/utils/tensor_new.cpp
@@ -193,12 +193,15 @@ Tensor internal_new_from_data(
      PyObject* data,
      bool copy_variables,
      bool copy_numpy,
-    bool type_inference) {
+    bool type_inference,
+    bool pin_memory = false) {
+
    if (THPUtils_checkString(data)) {
      throw TypeError("new(): invalid data type '%s'", Py_TYPE(data)->tp_name);
    }
  
    if (THPVariable_Check(data)) {
+    AT_CHECK(!pin_memory, "Can't pin tensor constructed from a variable");
      auto var = reinterpret_cast<THPVariable*>(data)->cdata;
      if (copy_variables) {
        var = var.detach();
@@ -214,6 +217,7 @@ Tensor internal_new_from_data(
  
  #ifdef USE_NUMPY
    if (PyArray_Check(data)) {
+    AT_CHECK(!pin_memory, "Can't pin tensor constructed from numpy");
      auto tensor = autograd::make_variable(tensor_from_numpy(data), /*requires_grad=*/false);
      const auto& scalar_type = type_inference ? tensor.scalar_type() : type.scalarType();
      auto device = device_opt.has_value() ? *device_opt : at::Device(type.device_type());
@@ -225,7 +229,7 @@ Tensor internal_new_from_data(
  
    auto sizes = compute_sizes(data);
    ScalarType scalar_type = type_inference ? infer_scalar_type(data) : type.scalarType();
-  auto tensor = autograd::make_variable(at::empty(sizes, at::initialTensorOptions().dtype(scalar_type)), /*requires_grad=*/false);
+  auto tensor = autograd::make_variable(at::empty(sizes, at::initialTensorOptions().dtype(scalar_type).pinned_memory(pin_memory)), /*requires_grad=*/false);
    recursive_store(
        (char*)tensor.data_ptr(), tensor.sizes(), tensor.strides(), 0,
        scalar_type, tensor.element_size(), data);
@@ -502,10 +506,10 @@ Tensor sparse_coo_tensor_ctor(const Type& default_type, PyObject* args, PyObject
  
  Tensor tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs) {
    static PythonArgParser parser({
-    "tensor(PyObject* data, *, ScalarType dtype=None, Device? device=None, bool requires_grad=False)",
+    "tensor(PyObject* data, *, ScalarType dtype=None, Device? device=None, bool pin_memory=False, bool requires_grad=False)",
    });
  
-  ParsedArgs<4> parsed_args;
+  ParsedArgs<5> parsed_args;
    auto r = parser.parse(args, kwargs, parsed_args);
    if (r.idx == 0) {
      PyObject* data = r.pyobject(0);
@@ -516,14 +520,16 @@ Tensor tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs) {
      }
  
      bool type_inference = r.isNone(1);
-    bool args_requires_grad = r.toBool(3);
+    bool pin_memory = r.toBool(3);
+    bool args_requires_grad = r.toBool(4);
      auto new_tensor = internal_new_from_data(
                 typeWithDefault(r, 1, 2, type),
                 r.deviceOptional(2),
                 data,
                 true,
                 true,
-               type_inference);
+               type_inference,
+               pin_memory);
      new_tensor.detach_(); // ensure new_tensor a leaf node
      new_tensor.set_requires_grad(args_requires_grad);
      return new_tensor;
@@ -576,10 +582,10 @@ Tensor new_tensor(const Type& type, PyObject* args, PyObject* kwargs) {
  
  Tensor new_empty(const Type& type, PyObject* args, PyObject* kwargs) {
    static PythonArgParser parser({
-    "new_empty(IntArrayRef size, *, ScalarType dtype=None, Device? device=None, bool requires_grad=False)",
+    "new_empty(IntArrayRef size, *, ScalarType dtype=None, Device? device=None, bool pin_memory=False, bool requires_grad=False)",
    }, /*traceable=*/true);
  
-  ParsedArgs<4> parsed_args;
+  ParsedArgs<5> parsed_args;
    auto r = parser.parse(args, kwargs, parsed_args);
    if (r.idx == 0) {
      const auto& actual_type = typeWithDefault(r, 1, 2, type);
diff --git a/torch/onnx/symbolic.py b/torch/onnx/symbolic.py

index f31c136..383db27 100644 (file)
--- a/torch/onnx/symbolic.py
+++ b/torch/onnx/symbolic.py
@@ -76,6 +76,8 @@ def _parse_arg(value, desc):
          return int(tval)
      elif desc == 'f':
          return float(tval)
+    elif desc == 'b':
+        return bool(tval)
      elif desc == 't':
          return tval
      elif desc == 'is':
@@ -1279,49 +1281,50 @@ scalar_type_to_onnx = [
  ]
  
  
-@parse_args('v', 'i', 'v', 'v')
-def zeros(g, sizes, dtype, layout, device):
+@parse_args('v', 'i', 'v', 'v', 'b')
+def zeros(g, sizes, dtype, layout, device, pin_memory=False):
      # NOTE: no way to set device and layout in ONNX, so we ignore it
      return g.op("ConstantOfShape", sizes,
-                value_t=torch.tensor([0], dtype=scalar_type_to_pytorch_type[dtype]))
+                value_t=torch.tensor([0], dtype=scalar_type_to_pytorch_type[dtype], pin_memory=pin_memory))
  
  
-@parse_args('v', 'i', 'v', 'v')
-def zeros_like(g, input, dtype, layout, device):
+@parse_args('v', 'i', 'v', 'v', 'b')
+def zeros_like(g, input, dtype, layout, device, pin_memory=False):
      shape = g.op("Shape", input)
      return g.op("ConstantOfShape", shape,
-                value_t=torch.tensor([0], dtype=scalar_type_to_pytorch_type[dtype]))
+                value_t=torch.tensor([0], dtype=scalar_type_to_pytorch_type[dtype], pin_memory=pin_memory))
  
  
-@parse_args('v', 'i', 'v', 'v')
-def ones(g, sizes, dtype, layout, device):
+@parse_args('v', 'i', 'v', 'v', 'b')
+def ones(g, sizes, dtype, layout, device, pin_memory=False):
      return g.op("ConstantOfShape", sizes,
-                value_t=torch.tensor([1], dtype=scalar_type_to_pytorch_type[dtype]))
+                value_t=torch.tensor([1], dtype=scalar_type_to_pytorch_type[dtype], pin_memory=pin_memory))
  
  
-@parse_args('v', 'i', 'v', 'v')
-def ones_like(g, input, dtype, layout, device):
+@parse_args('v', 'i', 'v', 'v', 'b')
+def ones_like(g, input, dtype, layout, device, pin_memory=False):
      shape = g.op("Shape", input)
      return g.op("ConstantOfShape", shape,
-                value_t=torch.tensor([1], dtype=scalar_type_to_pytorch_type[dtype]))
+                value_t=torch.tensor([1], dtype=scalar_type_to_pytorch_type[dtype], pin_memory=pin_memory))
  
  
-def full(g, sizes, value, dtype, layout, device):
+def full(g, sizes, value, dtype, layout, device, pin_memory=False):
      const_value = _maybe_get_const(value, 't')
      if _is_value(const_value):
-        tmp = zeros(sizes, dtype, layout, device)
+        tmp = zeros(sizes, dtype, layout, device, pin_memory=pin_memory)
          return add(tmp, value, g.op("Constant", value_t=torch.tensor(1)))
      else:
          dtype = _get_const(dtype, 'i', 'dtype')
+        pin_memory = _get_const(pin_memory, 'b', 'pin_memory')
          return g.op("ConstantOfShape", sizes,
-                    value_t=torch.tensor([const_value], dtype=scalar_type_to_pytorch_type[dtype]))
+                    value_t=torch.tensor([const_value], dtype=scalar_type_to_pytorch_type[dtype], pin_memory=pin_memory))
  
  
-@parse_args('v', 'f', 'i', 'v', 'v')
-def full_like(g, input, fill_value, dtype, layout, device):
+@parse_args('v', 'f', 'i', 'v', 'v','b')
+def full_like(g, input, fill_value, dtype, layout, device, pin_memory=False):
      shape = g.op("Shape", input)
      return g.op("ConstantOfShape", shape,
-                value_t=torch.tensor([fill_value], dtype=scalar_type_to_pytorch_type[dtype]))
+                value_t=torch.tensor([fill_value], dtype=scalar_type_to_pytorch_type[dtype], pin_memory=pin_memory))
  
  
  @parse_args('v', 'v', 'v', 'v', 'i')
@@ -1384,6 +1387,11 @@ def to(g, self, *args):
          dtype = _get_const(args[0], 'i', 'dtype')
          # Layout and device are ignored
          return g.op("Cast", self, to_i=scalar_type_to_onnx[dtype])
+    elif len(args) == 6:
+        # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, bool) -> Tensor
+        dtype = _get_const(args[0], 'i', 'dtype')
+        # Layout and device are ignored
+        return g.op("Cast", self, to_i=scalar_type_to_onnx[dtype])
      else:
          raise NotImplementedError("Unknown aten::to signature")
author	Vitaly Fedyunin <vitalyf@fb.com>
	Tue, 2 Apr 2019 15:44:27 +0000 (08:44 -0700)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Tue, 2 Apr 2019 15:48:19 +0000 (08:48 -0700)
aten/src/ATen/native/TensorFactories.cpp		patch \| blob \| history
aten/src/ATen/native/cuda/TensorFactories.cu		patch \| blob \| history
aten/src/ATen/native/native_functions.yaml		patch \| blob \| history
aten/src/ATen/native/sparse/SparseTensor.cpp		patch \| blob \| history
aten/src/ATen/native_parse.py		patch \| blob \| history
c10/core/TensorOptions.h		patch \| blob \| history
test/test_torch.py		patch \| blob \| history
tools/autograd/gen_python_functions.py		patch \| blob \| history
tools/autograd/templates/python_torch_functions.cpp		patch \| blob \| history
tools/jit/gen_jit_dispatch.py		patch \| blob \| history
tools/pyi/gen_pyi.py		patch \| blob \| history
torch/_tensor_docs.py		patch \| blob \| history
torch/_torch_docs.py		patch \| blob \| history
torch/csrc/jit/ir.cpp		patch \| blob \| history
torch/csrc/jit/operator.cpp		patch \| blob \| history
torch/csrc/jit/passes/shape_analysis.cpp		patch \| blob \| history
torch/csrc/jit/tracer.cpp		patch \| blob \| history
torch/csrc/utils/tensor_new.cpp		patch \| blob \| history
torch/onnx/symbolic.py		patch \| blob \| history