From 433db13b48ea4ac56847ac5a664db9abfdb8c09a Mon Sep 17 00:00:00 2001 From: Gregory Chanan Date: Fri, 21 Dec 2018 08:18:37 -0800 Subject: [PATCH] Move torch.logspace to ATen and parallelize on CPU. Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/15438 Reviewed By: ezyang Differential Revision: D13529626 Pulled By: gchanan fbshipit-source-id: 896e8afee3d6b5a706c4f5815b91ba6bd8af6672 --- aten/src/ATen/Declarations.cwrap | 19 ------------- aten/src/ATen/native/RangeFactories.cpp | 34 ++++++++++++++++++++++ aten/src/ATen/native/TensorFactories.cpp | 16 ++--------- aten/src/ATen/native/cuda/RangeFactories.cu | 44 +++++++++++++++++++++++++++++ aten/src/ATen/native/native_functions.yaml | 11 ++++---- aten/src/TH/generic/THTensorMath.h | 2 -- aten/src/TH/generic/THTensorMoreMath.cpp | 21 -------------- aten/src/THC/THCTensorMath.cu | 14 --------- aten/src/THC/generic/THCTensorMath.cu | 27 ------------------ aten/src/THC/generic/THCTensorMath.h | 6 ---- 10 files changed, 85 insertions(+), 109 deletions(-) diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap index 4975dc4..b47bcfb 100644 --- a/aten/src/ATen/Declarations.cwrap +++ b/aten/src/ATen/Declarations.cwrap @@ -1882,25 +1882,6 @@ - real weight ]] [[ - name: _th_logspace - cname: logspace - types: - - Float - - Double - backends: - - CPU - - CUDA - variants: - - function - return: argument 0 - arguments: - - arg: THTensor* result - output: True - - real start - - real end - - long steps -]] -[[ name: _th_histc cname: histc types: diff --git a/aten/src/ATen/native/RangeFactories.cpp b/aten/src/ATen/native/RangeFactories.cpp index e72a489..0a14574 100644 --- a/aten/src/ATen/native/RangeFactories.cpp +++ b/aten/src/ATen/native/RangeFactories.cpp @@ -38,4 +38,38 @@ Tensor& linspace_cpu_out(Tensor& result, Scalar start, Scalar end, int64_t steps return result; } +Tensor& logspace_cpu_out(Tensor& result, Scalar start, Scalar end, int64_t steps) { + AT_CHECK(steps >= 0, "number of steps must be non-negative"); + + if (result.numel() != steps) { + result.resize_({steps}); + } + Tensor r = result.is_contiguous() ? result : result.contiguous(); + + if (steps == 0) { + // skip + } else if (steps == 1) { + r.fill_(std::pow(10.0, start.to())); + } else { + AT_DISPATCH_FLOATING_TYPES(r.type(), "logspace", [&]() { + scalar_t base10 = 10; + scalar_t scalar_start = start.to(); + scalar_t scalar_end = end.to(); + scalar_t *data_ptr = r.data(); + scalar_t step = (scalar_end - scalar_start) / static_cast(steps - 1); + at::parallel_for(0, steps, internal::GRAIN_SIZE, [&](int64_t p_begin, int64_t p_end) { + scalar_t is = static_cast(p_begin); + for (int64_t i = p_begin; i < p_end; ++i, ++is) { + data_ptr[i]= std::pow(base10, scalar_start + step*is); + } + }); + }); + } + + if (!result.is_contiguous()) { + result.copy_(r); + } + return result; +} + }} // namespace at::native diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index 3dea98a..237f045 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -250,25 +250,13 @@ Tensor linspace( // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ logspace ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Tensor logspace(Scalar start, Scalar end, const TensorOptions& options) { - return native::logspace(start, end, /*steps=*/100, options); -} - Tensor logspace( Scalar start, Scalar end, int64_t steps, const TensorOptions& options) { - // Note [Native bindings for legacy TH factory functions] - return getFactoryType(options)._th_logspace(start, end, steps); -} - -Tensor& logspace_out(Tensor& result, Scalar start, Scalar end) { - return native::logspace_out(result, start, end, /*steps=*/100); -} - -Tensor& logspace_out(Tensor& result, Scalar start, Scalar end, int64_t steps) { - return at::legacy::th::_th_logspace_out(result, start, end, steps); + Tensor result = at::empty({steps}, options); + return at::logspace_out(result, start, end, steps); } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ones ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/aten/src/ATen/native/cuda/RangeFactories.cu b/aten/src/ATen/native/cuda/RangeFactories.cu index ba6bac9..3a41cb6 100644 --- a/aten/src/ATen/native/cuda/RangeFactories.cu +++ b/aten/src/ATen/native/cuda/RangeFactories.cu @@ -21,6 +21,20 @@ struct LinspaceOp { const accT start_, step_; }; +template +struct LogspaceOp { + __host__ __device__ LogspaceOp(accT start, accT step): + start_(start), step_(step) { } + __device__ __forceinline__ T operator()(ptrdiff_t index) { + accT increment = step_ * static_cast(index); + accT base10 = 10; + accT value = std::pow(base10, start_ + increment); + return static_cast(value); + } + + const accT start_, step_; +}; + Tensor& linspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t steps) { AT_CHECK(steps >= 0, "number of steps must be non-negative"); @@ -51,4 +65,34 @@ Tensor& linspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step return result; } +Tensor& logspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t steps) { + AT_CHECK(steps >= 0, "number of steps must be non-negative"); + + if (result.numel() != steps) { + result.resize_({steps}); + } + Tensor r = result.is_contiguous() ? result : result.contiguous(); + + if (steps == 0) { + // skip + } else if (steps == 1) { + r.fill_(std::pow(10.0, start.to())); + } else { + AT_DISPATCH_FLOATING_TYPES(r.type(), "logspace", [&]() { + scalar_t scalar_start = start.to(); + scalar_t scalar_end = end.to(); + scalar_t step = (scalar_end - scalar_start) / static_cast(steps - 1); + LogspaceOp logspace_method(scalar_start, step); + thrust::device_ptr data_(r.data()); + thrust::tabulate(data_, data_ + steps, logspace_method); + }); + } + + if (!result.is_contiguous()) { + result.copy_(r); + } + AT_CUDA_CHECK(cudaGetLastError()); + return result; +} + }} // namespace at::native diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 38edbad..2028132 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1026,13 +1026,12 @@ - func: logdet(Tensor self) -> Tensor variants: function, method -- func: logspace(Scalar start, Scalar end, TensorOptions options={}) -> Tensor +- func: logspace(Scalar start, Scalar end, int64_t steps=100, TensorOptions options={}) -> Tensor -- func: logspace(Scalar start, Scalar end, int64_t steps, TensorOptions options={}) -> Tensor - -- func: logspace_out(Tensor result, Scalar start, Scalar end) -> Tensor - -- func: logspace_out(Tensor result, Scalar start, Scalar end, int64_t steps) -> Tensor +- func: logspace_out(Tensor result, Scalar start, Scalar end, int64_t steps=100) -> Tensor + dispatch: + CPU: logspace_cpu_out + CUDA: logspace_cuda_out # FIXME: These could be combined as optional but for https://github.com/pytorch/pytorch/issues/6593. - func: log_softmax(Tensor self, int64_t dim, ScalarType dtype) -> Tensor diff --git a/aten/src/TH/generic/THTensorMath.h b/aten/src/TH/generic/THTensorMath.h index be595bd..c4a5b39 100644 --- a/aten/src/TH/generic/THTensorMath.h +++ b/aten/src/TH/generic/THTensorMath.h @@ -196,8 +196,6 @@ TH_API accreal THTensor_(varall)(THTensor *self, int biased); TH_API accreal THTensor_(stdall)(THTensor *self, int biased); TH_API accreal THTensor_(normall)(THTensor *t, scalar_t value); -TH_API void THTensor_(logspace)(THTensor *r_, scalar_t a, scalar_t b, int64_t n); - TH_API void THTensor_(dirichlet_grad)(THTensor *self, THTensor *x, THTensor *alpha, THTensor *total); #endif diff --git a/aten/src/TH/generic/THTensorMoreMath.cpp b/aten/src/TH/generic/THTensorMoreMath.cpp index d1e2ab2..bae2b4a 100644 --- a/aten/src/TH/generic/THTensorMoreMath.cpp +++ b/aten/src/TH/generic/THTensorMoreMath.cpp @@ -2131,27 +2131,6 @@ accreal THTensor_(stdall)(THTensor *tensor, int biased) return sqrt(THTensor_(varall)(tensor, biased)); } -void THTensor_(logspace)(THTensor *r_, scalar_t a, scalar_t b, int64_t n) -{ - scalar_t i = 0; - - THArgCheck((n >= 0), 3, "number of points must be non-negative"); - - if (THTensor_(nElement)(r_) != n) { - THTensor_(resize1d)(r_, n); - } - - if (n == 0) { - } else if (n == 1) { - THTensor_(set1d)(r_, 0, TH_MATH_NAME(pow)(10.0, a)); - } else { - TH_TENSOR_APPLY(scalar_t, r_, - *r__data = TH_MATH_NAME(pow)(10.0, a + i*(b-a)/((scalar_t)(n-1))); - i++; - ); - } -} - void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue) { scalar_t minval; diff --git a/aten/src/THC/THCTensorMath.cu b/aten/src/THC/THCTensorMath.cu index d4808ee..482ff5e 100644 --- a/aten/src/THC/THCTensorMath.cu +++ b/aten/src/THC/THCTensorMath.cu @@ -122,19 +122,5 @@ struct LinspaceOp { const accT start_, step_; }; -template -struct LogspaceOp { - __host__ __device__ LogspaceOp(accT start, accT step): - start_(start), step_(step) { } - __device__ __forceinline__ T operator()(ptrdiff_t index) { - accT increment = THCNumerics::mul(step_, ScalarConvert::to(index)); - accT value = THCNumerics::exp10(THCNumerics::add(start_, increment)); - return ScalarConvert::to(value); - } - - const accT start_, step_; -}; - - #include #include diff --git a/aten/src/THC/generic/THCTensorMath.cu b/aten/src/THC/generic/THCTensorMath.cu index b33bcff..7ae1878 100644 --- a/aten/src/THC/generic/THCTensorMath.cu +++ b/aten/src/THC/generic/THCTensorMath.cu @@ -391,33 +391,6 @@ accreal THCTensor_(trace)(THCState *state, THCTensor *src_) { return trace; } -#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF) - -void THCTensor_(logspace)(THCState *state, THCTensor *r_, scalar_t a, scalar_t b, int64_t n) { - THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, r_)); - THArgCheck((n >= 0), 3, "number of points must be non-negative"); - if (THCTensor_(nElement)(state, r_) != n) THCTensor_(resize1d)(state, r_, n); - if (n == 0) { - // skip - } else if (n == 1) THCTensor_(fill)(state, r_, THCNumerics::exp10(a)); - else { - THCTensor *r = THCTensor_(isContiguous)(state, r_) - ? r_ - : THCTensor_(newContiguous)(state, r_); - scalar_t step = THCNumerics::div(THCNumerics::sub(b, a), - ScalarConvert::to(n - 1)); - LogspaceOp logspace_method(a, step); - thrust::device_ptr data_(THCTensor_(data)(state, r)); - thrust::tabulate(data_, data_ + n, logspace_method); - if (!THCTensor_(isContiguous)(state, r_)) { - THCTensor_(freeCopyTo)(state, r, r_); - } - } - THCudaCheck(cudaGetLastError()); -} - -#endif - void THCTensor_(range)(THCState *state, THCTensor *r_, accreal xmin, accreal xmax, accreal step) { THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, r_)); THArgCheck(step > 0 || step < 0, 3, "step must be nonzero"); diff --git a/aten/src/THC/generic/THCTensorMath.h b/aten/src/THC/generic/THCTensorMath.h index 3288932..6ff8c73 100644 --- a/aten/src/THC/generic/THCTensorMath.h +++ b/aten/src/THC/generic/THCTensorMath.h @@ -19,12 +19,6 @@ THC_API void THCTensor_(eye)(THCState *state, THCTensor *self, int64_t n, int64_ THC_API accreal THCTensor_(trace)(THCState *state, THCTensor *self); -#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF) - -THC_API void THCTensor_(logspace)(THCState *state, THCTensor *r_, scalar_t a, scalar_t b, int64_t n); - -#endif - THC_API void THCTensor_(range)(THCState *state, THCTensor *r_, accreal xmin, accreal xmax, accreal step); THC_API void THCTensor_(arange)(THCState *state, THCTensor *r_, accreal xmin, accreal xmax, accreal step); -- 2.7.4