[XLA][BF16] Add tests to demonstrate the brokeness of BF16 RNG functions in LLVM...
authorYunxing Dai <yunxing@google.com>
Mon, 22 Jan 2018 23:10:08 +0000 (15:10 -0800)
committerTensorFlower Gardener <gardener@tensorflow.org>
Mon, 22 Jan 2018 23:13:40 +0000 (15:13 -0800)
- Added prng tests to demonstrate the brokeness of BF16 RNG functions in LLVM backends.

PiperOrigin-RevId: 182843680

tensorflow/compiler/tests/BUILD
tensorflow/compiler/xla/tests/prng_test.cc

index f7c6cd293a8a4788bd73cc42c5c61e60d4a2c110..314f5506b16e2c28736d9d39aa6c856d50885108 100644 (file)
@@ -403,11 +403,6 @@ tf_xla_py_test(
     disabled_backends = [
         "gpu",
     ],
-    tags = [
-        "manual",
-        "no_oss",
-        "notap",
-    ],
     deps = [
         ":xla_test",
         "//tensorflow/python:framework_for_generated_wrappers",
index 6489eee9f34c6c4426d52e166f7b401d5948742f..d8d7272c3bb54b8de2e808879b852844d1362cd3 100644 (file)
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <limits>
 #include <memory>
 
 #include "tensorflow/compiler/xla/client/computation_builder.h"
@@ -25,6 +26,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
@@ -36,36 +38,42 @@ namespace {
 class PrngTest : public ClientLibraryTestBase {
  protected:
   template <typename T>
-  void UniformTest(T a, T b, tensorflow::gtl::ArraySlice<int64> dims);
-
-  template <typename T>
-  void BernoulliTest(float p, tensorflow::gtl::ArraySlice<int64> dims);
+  std::unique_ptr<Literal> UniformTest(T a, T b,
+                                       tensorflow::gtl::ArraySlice<int64> dims,
+                                       int64 seed = 42);
 
   // Computes the χ² statistic of a sample of the discrete uniform distribution
   // of the given range size. `expected_count` is the number of times each
   // possible value is expected to be generated. Thus, the sample size is
   // `range_size * expected_count`.
-  double UniformChiSquared(int32 range_size, int32 expected_count);
+  double UniformChiSquared(int32 range_size, int32 expected_count,
+                           int64 seed = 42);
 };
 
 template <typename T>
-void PrngTest::UniformTest(T a, T b, tensorflow::gtl::ArraySlice<int64> dims) {
+std::unique_ptr<Literal> PrngTest::UniformTest(
+    T a, T b, tensorflow::gtl::ArraySlice<int64> dims, int64 seed) {
   ComputationBuilder builder(client_, TestName());
   builder.RngUniform(
       builder.ConstantR0<T>(a), builder.ConstantR0<T>(b),
       ShapeUtil::MakeShape(primitive_util::NativeToPrimitiveType<T>(), dims));
 
-  SetSeed(42);
+  SetSeed(seed);
   auto actual = ExecuteAndTransferOrDie(&builder, /*arguments=*/{});
   EXPECT_THAT(dims, ::testing::ElementsAreArray(actual->shape().dimensions()));
   actual->EachCell<T>([=](tensorflow::gtl::ArraySlice<int64>, T value) {
     EXPECT_LE(a, value);
     EXPECT_LT(value, b);
   });
+  return actual;
 }
 
 // Uniform random number generation tests
 XLA_TEST_F(PrngTest, ScalarU01) { UniformTest<float>(0, 1, {}); }
+XLA_TEST_F(PrngTest, ScalarU01limits) {
+  UniformTest<float>(std::numeric_limits<float>::min(),
+                     std::numeric_limits<float>::max(), {});
+}
 XLA_TEST_F(PrngTest, ZeroValuesU01) { UniformTest<float>(0, 1, {0}); }
 XLA_TEST_F(PrngTest, TenValuesU01) { UniformTest<float>(0, 1, {10}); }
 XLA_TEST_F(PrngTest, TenValuesU37) { UniformTest<float>(3, 7, {10}); }
@@ -73,6 +81,55 @@ XLA_TEST_F(PrngTest, ZeroValuesR2) { UniformTest<float>(0, 1, {0, 20}); }
 XLA_TEST_F(PrngTest, LargeU01) { UniformTest<float>(0, 1, {0x100, 0x100}); }
 XLA_TEST_F(PrngTest, TwelveValuesU524) { UniformTest<int32>(5, 24, {12}); }
 
+// TODO(b/71543667): Fix Rng ops on LLVM backends.
+XLA_TEST_F(PrngTest, DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(
+                         DISABLED_ON_CPU(ScalarBF16Tests)))) {
+  for (int64 seed = 0; seed < 100; ++seed) {
+    // The largest negative number smaller than zero in bf16 that's not
+    // denormalized.
+    float low = bit_cast<float>(0x80800000);
+    float high = 0.0f;
+    UniformTest<bfloat16>(static_cast<bfloat16>(low),
+                          static_cast<bfloat16>(high), {}, /*seed=*/seed);
+
+    // Test odd and even values.
+    UniformTest<bfloat16>(static_cast<bfloat16>(32.75),
+                          static_cast<bfloat16>(33), {}, /*seed=*/seed);
+    UniformTest<bfloat16>(static_cast<bfloat16>(32.50),
+                          static_cast<bfloat16>(32.75), {}, /*seed=*/seed);
+    UniformTest<bfloat16>(static_cast<bfloat16>(-33.00),
+                          static_cast<bfloat16>(-32.75), {}, /*seed=*/seed);
+    UniformTest<bfloat16>(static_cast<bfloat16>(-32.75),
+                          static_cast<bfloat16>(-32.50), {}, /*seed=*/seed);
+  }
+}
+
+// TODO(b/71543667): Fix Rng ops on LLVM backends.
+XLA_TEST_F(PrngTest, DISABLED_ON_GPU(DISABLED_ON_CPU(
+                         DISABLED_ON_CPU_PARALLEL(ScalarBF16CountTests)))) {
+  // There are 3 BF16 values in the range of [32.25, 33): 32.25, 32.5, 32.75,
+  // they should get similar counts.
+  bfloat16 low = static_cast<bfloat16>(32.25);
+  bfloat16 high = static_cast<bfloat16>(33);
+  bfloat16 interval = static_cast<bfloat16>(0.25);
+  std::vector<int32> counts(static_cast<int64>((high - low) / interval), 0);
+
+  constexpr int64 count = 100;
+  for (int64 seed = 0; seed < count; ++seed) {
+    auto result = UniformTest<bfloat16>(low, high, {}, /*seed=*/seed);
+    result->Literal::EachCell<bfloat16>(
+        [&](tensorflow::gtl::ArraySlice<int64>, bfloat16 value) {
+          int64 index = static_cast<int64>((value - low) / interval);
+          counts[index]++;
+        });
+  }
+  // Each bucket should have similar amount of counts. That is, not more than
+  // 10% of total counts. This mostly tests that we don't fall into a 1:2:2
+  // distribution, which yields 20% expected difference.
+  EXPECT_LT(std::abs(counts[0] - counts[1]), count * 0.1);
+  EXPECT_LT(std::abs(counts[1] - counts[2]), count * 0.1);
+}
+
 namespace {
 template <typename T>
 T Square(T x) {
@@ -80,7 +137,8 @@ T Square(T x) {
 }
 }  // namespace
 
-double PrngTest::UniformChiSquared(int32 range_size, int32 expected_count) {
+double PrngTest::UniformChiSquared(int32 range_size, int32 expected_count,
+                                   int64 seed) {
   int32 sample_size = range_size * expected_count;
 
   ComputationBuilder builder(client_, TestName());
@@ -88,7 +146,7 @@ double PrngTest::UniformChiSquared(int32 range_size, int32 expected_count) {
                      builder.ConstantR0<int32>(range_size),
                      ShapeUtil::MakeShape(S32, {sample_size}));
 
-  SetSeed(42);
+  SetSeed(seed);
   auto actual = ExecuteAndTransferOrDie(&builder, /*arguments=*/{});
   std::vector<int32> counts(range_size, 0);
   actual->EachCell<int32>([&counts](tensorflow::gtl::ArraySlice<int64>,