From fc0b8e60337ae46b90ed5d2f6d1f623f0f8d6581 Mon Sep 17 00:00:00 2001 From: CaoE Date: Tue, 10 Aug 2021 13:21:22 -0700 Subject: [PATCH] Add BFloat16 support for unique and unique_consecutive on CPU (#62559) Summary: Add BFloat16 support for unique and unique_consecutive on CPU. Pull Request resolved: https://github.com/pytorch/pytorch/pull/62559 Reviewed By: anjali411 Differential Revision: D30199482 Pulled By: ngimel fbshipit-source-id: 6f2d9cc1a528bea7c723139a4f1b14e4b2213601 --- aten/src/ATen/native/Unique.cpp | 22 +++++++++++++++------- test/test_sort_and_select.py | 4 +++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/aten/src/ATen/native/Unique.cpp b/aten/src/ATen/native/Unique.cpp index 2ac0403..843e741 100644 --- a/aten/src/ATen/native/Unique.cpp +++ b/aten/src/ATen/native/Unique.cpp @@ -8,6 +8,16 @@ #include #include +namespace std { + template<> struct hash + { + size_t operator()(const at::BFloat16& v) const noexcept + { + return std::hash()(v.x); + } + }; +} + namespace at { namespace native{ @@ -25,11 +35,9 @@ std::tuple unique_cpu_template( Tensor output; Tensor inverse_indices = at::empty({0}, self.options().dtype(kLong)); Tensor counts = at::empty({0}, self.options().dtype(kLong)); - std::unordered_set set(input_data, input_data + numel); output = at::empty({static_cast(set.size())}, input.options()); scalar_t *output_data = output.data_ptr(); - if (sorted) { std::vector vec(set.begin(), set.end()); std::sort(vec.begin(), vec.end()); @@ -236,7 +244,7 @@ std::tuple _unique_dim_cpu_template( std::tuple _unique_cpu(const Tensor& self, const bool sorted, const bool return_inverse) { - return AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Bool, self.scalar_type(), "unique", [&] { + return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::BFloat16, at::ScalarType::Bool, self.scalar_type(), "unique", [&] { Tensor output, inverse; std::tie(output, inverse, std::ignore) = unique_cpu_template(self, sorted, return_inverse, false); return std::make_tuple(output, inverse); @@ -245,14 +253,14 @@ _unique_cpu(const Tensor& self, const bool sorted, const bool return_inverse) { std::tuple _unique2_cpu(const Tensor& self, const bool sorted, const bool return_inverse, const bool return_counts) { - return AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Bool, self.scalar_type(), "unique", [&] { + return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::BFloat16, at::ScalarType::Bool, self.scalar_type(), "unique", [&] { return unique_cpu_template(self, sorted, return_inverse, return_counts); }); } std::tuple unique_dim_cpu(const Tensor& self, const int64_t dim, const bool sorted, const bool return_inverse, const bool return_counts) { - return AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Bool, self.scalar_type(), "unique_dim", [&] { + return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::BFloat16, at::ScalarType::Bool, self.scalar_type(), "unique_dim", [&] { // The current implementation using `dim` always sorts due to unhashable tensors return _unique_dim_cpu_template(self, dim, false, return_inverse, return_counts); }); @@ -260,7 +268,7 @@ unique_dim_cpu(const Tensor& self, const int64_t dim, const bool sorted, const b std::tuple unique_dim_consecutive_cpu(const Tensor& self, const int64_t dim, const bool return_inverse, const bool return_counts) { - return AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Bool, self.scalar_type(), "unique_dim", [&] { + return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::BFloat16, at::ScalarType::Bool, self.scalar_type(), "unique_dim", [&] { return _unique_dim_cpu_template(self, dim, true, return_inverse, return_counts); }); } @@ -268,7 +276,7 @@ unique_dim_consecutive_cpu(const Tensor& self, const int64_t dim, const bool ret std::tuple unique_consecutive_cpu(const Tensor& self, const bool return_inverse, const bool return_counts, c10::optional dim) { if (!dim.has_value()) { - return AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Bool, self.scalar_type(), "unique", [&] { + return AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::BFloat16, at::ScalarType::Bool, self.scalar_type(), "unique", [&] { return unique_consecutive_cpu_template(self, return_inverse, return_counts); }); } diff --git a/test/test_sort_and_select.py b/test/test_sort_and_select.py index 53d34ec..f716a48 100644 --- a/test/test_sort_and_select.py +++ b/test/test_sort_and_select.py @@ -10,7 +10,7 @@ from torch.testing._internal.common_utils import \ (TEST_WITH_ROCM, TestCase, run_tests, make_tensor, slowTest) from torch.testing._internal.common_device_type import \ (instantiate_device_type_tests, dtypes, onlyOnCPUAndCUDA, - skipCUDAIfRocm, onlyCUDA, dtypesIfCUDA, onlyCPU, largeTensorTest) + skipCUDAIfRocm, onlyCUDA, dtypesIfCUDA, dtypesIfCPU, onlyCPU, largeTensorTest) # TODO: remove this SIZE = 100 @@ -703,6 +703,7 @@ class TestSortAndSelect(TestCase): self.assertEqual(expected_inverse.view(additional_shape), y_inverse) self.assertEqual(expected_counts, y_counts) + @dtypesIfCPU(*set(torch.testing.get_all_dtypes()) - {torch.complex64, torch.complex128}) @dtypes(*set(torch.testing.get_all_dtypes()) - {torch.bfloat16, torch.complex64, torch.complex128}) def test_unique(self, device, dtype): if dtype is torch.half and self.device_type == 'cpu': @@ -762,6 +763,7 @@ class TestSortAndSelect(TestCase): count += 1 self.assertEqual(j, count) + @dtypesIfCPU(*set(torch.testing.get_all_dtypes()) - {torch.complex64, torch.complex128}) @dtypes(*set(torch.testing.get_all_dtypes()) - {torch.bfloat16, torch.complex64, torch.complex128}) def test_unique_consecutive(self, device, dtype): if dtype is torch.half and self.device_type == 'cpu': -- 2.7.4