From: Thomas Viehmann Date: Thu, 21 Mar 2019 16:59:20 +0000 (-0700) Subject: move median to ATen (#17637) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~707 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8356ffa922516f32742740d8ab284527fc6bf05d;p=platform%2Fupstream%2Fpytorch.git move median to ATen (#17637) Summary: This moves median to ATen. - median with dimension reduces to kthvalue - median without dimension (aka medianall) is implemented in parallel to kthvalue because we would not want to reshape (copying for non-contiguous) and then copy again in kthvalue. We can sue the helper functions we moved from kthvalue. - `median_cuda` was accidentally already put into ATen in #17544. - The quickselect algorthm without indices for CPU in TH is now obsolete and removed. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17637 Differential Revision: D14346510 Pulled By: ezyang fbshipit-source-id: c07ad144efbd6b4194179bb1c02635862521d8cb --- diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap index c9cec39..c67a719 100644 --- a/aten/src/ATen/Declarations.cwrap +++ b/aten/src/ATen/Declarations.cwrap @@ -824,36 +824,6 @@ default: "false" ]] [[ - name: _th_median - variants: - - function - return: argument 0,1 - options: - - cname: medianall - return: real - arguments: - - THTensor* self -]] -[[ - name: _th_median - variants: function - cname: median - return: argument 0,1 - options: - - cname: median - scalar_check: self_->dim() == 0 || (keepdim == false && self_->dim() == 1) - arguments: - - arg: THTensor* values - output: True - - arg: THIndexTensor* indices - output: True - - THTensor* self - - arg: long dim - wrap_dim: self - - arg: bool keepdim - default: "false" -]] -[[ name: _th_sort cname: sort variants: diff --git a/aten/src/ATen/native/LegacyDefinitions.cpp b/aten/src/ATen/native/LegacyDefinitions.cpp index d8b3daa..69aac69 100644 --- a/aten/src/ATen/native/LegacyDefinitions.cpp +++ b/aten/src/ATen/native/LegacyDefinitions.cpp @@ -650,10 +650,6 @@ Tensor max(const Tensor & self) { return at::legacy::th::_th_max(self); } -Tensor median(const Tensor & self) { - return at::legacy::th::_th_median(self); -} - std::tuple sort_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t dim, bool descending) { return at::legacy::th::_th_sort_out(values, indices, self, dim, descending); } diff --git a/aten/src/ATen/native/Sorting.cpp b/aten/src/ATen/native/Sorting.cpp index 2a8af89..14132e3 100644 --- a/aten/src/ATen/native/Sorting.cpp +++ b/aten/src/ATen/native/Sorting.cpp @@ -1,8 +1,8 @@ #include +#include #include #include #include -#include namespace at { namespace native { @@ -172,7 +172,8 @@ std::tuple kthvalue_out_cpu( tmp_values, k - 1, [](scalar_t x, scalar_t y) -> bool { - return ((_isnan(x) && !_isnan(y)) || (x > y)); + return ( + (_isnan(x) && !_isnan(y)) || (x > y)); }, [&](int64_t i, int64_t j) { std::swap(tmp_values[i], tmp_values[j]); @@ -200,5 +201,53 @@ std::tuple kthvalue( return std::make_tuple(values, indices); } +std::tuple median_out( + Tensor& values, + Tensor& indices, + const Tensor& self, + int64_t dim, + bool keepdim) { + // note: kthvalue counts from 1..n + int64_t k = self.dim() > 0 ? (self.size(dim) + 1) / 2 : 1; + at::kthvalue_out(values, indices, self, k, dim, keepdim); + return std::forward_as_tuple(values, indices); +} + +std::tuple median( + const Tensor& self, + int64_t dim, + bool keepdim) { + Tensor values = at::empty({0}, self.options()); + Tensor indices = at::empty({0}, self.options().dtype(kLong)); + at::median_out(values, indices, self, dim, keepdim); + return std::make_tuple(values, indices); +} + +// this does not reduce to median with dim beause we don't want to copy twice +Tensor median_cpu(const Tensor& self) { + AT_CHECK(self.numel() > 0, "median cannot be called with empty tensor"); + if (self.dim() == 0 && self.numel() == 1) { + return self.clone(); + } + auto tmp_values = self.clone().view(-1); + auto result = at::empty({1}, self.options()); + AT_DISPATCH_ALL_TYPES(self.type(), "median", [&] { + // note, quick_select is 0 based while kthvalue is not + int64_t k = (tmp_values.size(0) - 1) / 2; + auto val_accessor = tmp_values.accessor(); + quick_select_template( + val_accessor, + k, + [](scalar_t x, scalar_t y) -> bool { + return ((_isnan(x) && !_isnan(y)) || (x > y)); + }, + [&](int64_t i, int64_t j) { + std::swap(val_accessor[i], val_accessor[j]); + }); + result.fill_(tmp_values[k]); + }); + return result.view({}); +} + } // namespace native } // namespace at diff --git a/aten/src/ATen/native/TensorCompare.cpp b/aten/src/ATen/native/TensorCompare.cpp index be2fb0a..ec7ddb5 100644 --- a/aten/src/ATen/native/TensorCompare.cpp +++ b/aten/src/ATen/native/TensorCompare.cpp @@ -97,26 +97,6 @@ Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& o return ret; } -std::tuple median(const Tensor& self, int64_t dim, bool keepdim) { - Tensor values = at::empty({0}, self.options()); - Tensor indices = at::empty({0}, self.options().dtype(kLong)); - return at::native::median_out(values, indices, self, dim, keepdim); -} - -std::tuple median_out(Tensor& values, Tensor& indices, - const Tensor& self, int64_t dim, bool keepdim) { - AT_CHECK(self.type().backend() == Backend::CPU || self.type().backend() == Backend::CUDA, - "median only supports CPU AND CUDA backend, got: ", toString(self.type().backend())); - dim = maybe_wrap_dim(dim, self.dim()); - if (_dimreduce_return_trivial_no_ident(values, self, dim, keepdim, "median")) { - AT_ASSERT(values.dim() == 0); - indices.resize_({}).fill_(0); - return std::forward_as_tuple(values, indices); - } else { - return at::legacy::th::_th_median_out(values, indices, self, dim, keepdim); - } -} - std::tuple mode(const Tensor& self, int64_t dim, bool keepdim) { Tensor values = at::empty({0}, self.options()); Tensor indices = at::empty({0}, self.options().dtype(kLong)); diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index b5a14f7..6a7a658 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -4000,6 +4000,9 @@ - func: median(Tensor self) -> Tensor matches_jit_signature: True variants: method, function + dispatch: + CPU: median_cpu + CUDA: median_cuda - func: sort(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) matches_jit_signature: True diff --git a/aten/src/TH/generic/THTensorMath.h b/aten/src/TH/generic/THTensorMath.h index b361a5d..771d915 100644 --- a/aten/src/TH/generic/THTensorMath.h +++ b/aten/src/TH/generic/THTensorMath.h @@ -24,7 +24,6 @@ TH_API accreal THTensor_(dot)(THTensor *t, THTensor *src); TH_API scalar_t THTensor_(minall)(THTensor *t); TH_API scalar_t THTensor_(maxall)(THTensor *t); -TH_API scalar_t THTensor_(medianall)(THTensor *t); TH_API accreal THTensor_(sumall)(THTensor *t); TH_API void THTensor_(neg)(THTensor *self, THTensor *src); @@ -76,7 +75,6 @@ TH_API void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor * TH_API void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, int64_t k, int dimension, int keepdim); TH_API void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension); TH_API void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension); diff --git a/aten/src/TH/generic/THTensorMoreMath.cpp b/aten/src/TH/generic/THTensorMoreMath.cpp index a081cac..0adf1a2 100644 --- a/aten/src/TH/generic/THTensorMoreMath.cpp +++ b/aten/src/TH/generic/THTensorMoreMath.cpp @@ -758,53 +758,6 @@ void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimensio /* Implementation of the Quickselect algorithm, based on Nicolas Devillard's public domain implementation at http://ndevilla.free.fr/median/median/ -Adapted similarly to the above Quicksort algorithm. -This version does not produce indices along with values. */ -static void THTensor_(quickselectnoidx)(scalar_t *arr, int64_t k, int64_t elements, int64_t stride) -{ - int64_t P, L, R, i, j; - scalar_t rswap, piv; - L = 0; - R = elements-1; - - do { - if (R <= L) /* One element only */ - return; - - if (R == L+1) { /* Two elements only */ - if (ARR(L) > ARR(R)) { - ARR_SWAP(L, R); - } - return; - } - - /* Use median of three for pivot choice */ - P=(L+R)>>1; - ARR_SWAP(P, L+1); - if (ARR(L+1) > ARR(R)) { ARR_SWAP(L+1, R); } - if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); } - if (ARR(L+1) > ARR(L)) { ARR_SWAP(L+1, L); } - - i = L+1; - j = R; - piv = ARR(L); - do { - do i++; while(ARR(i) < piv); - do j--; while(ARR(j) > piv); - if (j < i) - break; - ARR_SWAP(i, j); - } while(1); - ARR_SWAP(L, j); - - /* Re-set active partition */ - if (j <= k) L=i; - if (j >= k) R=j-1; - } while(1); -} - -/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's -public domain implementation at http://ndevilla.free.fr/median/median/ Adapted similarly to the above Quicksort algorithm. */ static void THTensor_(quickselect)(scalar_t *arr, int64_t *idx, int64_t k, int64_t elements, int64_t stride) { @@ -855,31 +808,6 @@ static void THTensor_(quickselect)(scalar_t *arr, int64_t *idx, int64_t k, int64 #undef REAL_SWAP #undef BOTH_SWAP -scalar_t THTensor_(medianall)(THTensor *tensor) -{ - THArgCheck(THTensor_nDimensionLegacyAll(tensor) > 0, 1, "tensor must have one dimension"); - - scalar_t theMedian; - ptrdiff_t numel; - int64_t k; - THTensor *temp_; - scalar_t *temp__data; - - numel = THTensor_(nElement)(tensor); - k = (numel-1) >> 1; - - temp_ = THTensor_(newClone)(tensor); - temp__data = temp_->data(); - - THTensor_(quickselectnoidx)(temp__data, k, numel, 1); - - theMedian = temp__data[k]; - - c10::raw::intrusive_ptr::decref(temp_); - - return theMedian; -} - void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) { THTensor *temp_; @@ -994,18 +922,6 @@ void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, } } -void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) -{ - int64_t t_size_dim, k; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3, "dimension out of range"); - - t_size_dim = THTensor_sizeLegacyNoScalars(t, dimension); - k = (t_size_dim-1) >> 1; /* take middle or one-before-middle element */ - - THTensor_(kthvalue)(values_, indices_, t, k+1, dimension, keepdim); -} - void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int64_t k, int dim, int dir, int sorted) { int numDims = THTensor_(nDimensionLegacyNoScalars)(t); diff --git a/aten/src/THC/generic/THCTensorMathReduce.cu b/aten/src/THC/generic/THCTensorMathReduce.cu index 7dbfacc..59fc805 100644 --- a/aten/src/THC/generic/THCTensorMathReduce.cu +++ b/aten/src/THC/generic/THCTensorMathReduce.cu @@ -328,74 +328,6 @@ scalar_t THCTensor_(maxall)(THCState *state, THCTensor *self) { return scalar_cast(val); } -scalar_t THCTensor_(medianall)(THCState *state, THCTensor *self) { - THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, self)); - - scalar_t val; - ptrdiff_t nelem, k; - - nelem = THCTensor_(nElement)(state, self); - k = (nelem-1) >> 1; - - THCTensor *view = THCTensor_(newView)(state, self, {nelem}); - - THCTensor *sorted = THCTensor_(new)(state); - THCudaLongTensor *indices = THCudaLongTensor_new(state); - - THCTensor_(sort)(state, sorted, indices, view, 0, 0); - - val = THCTensor_(get1d)(state, sorted, k); - - THCTensor_(free)(state, view); - THCTensor_(free)(state, sorted); - THCudaLongTensor_free(state, indices); - - THCudaCheck(cudaGetLastError()); - - return val; -} - -void THCTensor_(median)(THCState *state, - THCTensor *values, - THCudaLongTensor *indices, - THCTensor *self, - int dimension, - int keepdim) { - THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, self)); - - int64_t t_size_dim, k; - - t_size_dim = THCTensor_(size)(state, self, dimension); - - k = (t_size_dim-1) >> 1; - - THCTensor *sorted = THCTensor_(new)(state); - THCudaLongTensor *sorted_indices = THCudaLongTensor_new(state); - - THCTensor_(sort)(state, sorted, sorted_indices, self, dimension, 0); - - THCTensor *newValues = THCTensor_(newNarrow)(state, sorted, dimension, k, 1); - THCudaLongTensor *newIndices = THCudaLongTensor_newNarrow(state, sorted_indices, dimension, k, 1); - - THCTensor_(free)(state, sorted); - THCudaLongTensor_free(state, sorted_indices); - - if (!keepdim) { - THCTensor_(squeeze1d)(state, newValues, newValues, dimension); - THCudaLongTensor_squeeze1d(state, newIndices, newIndices, dimension); - } - - THCTensor_(resizeAs)(state, values, newValues); - THCudaLongTensor_resizeAs(state, indices, newIndices); - THCTensor_(copy)(state, values, newValues); - THCudaLongTensor_copy(state, indices, newIndices); - - THCTensor_(free)(state, newValues); - THCudaLongTensor_free(state, newIndices); - - THCudaCheck(cudaGetLastError()); -} - void THCTensor_(max)(THCState *state, THCTensor *values, THCudaLongTensor *indices,