From 4c1081cfddbba33dbd12a28635265bc718697e23 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 15 Mar 2018 21:33:44 -0700
Subject: [PATCH] Implement out of bounds behavior for gather in the HLO
 evaluator

This makes the OOB behavior of gather in the HLO evaluator consistent with
DynamicSlice while we figure out the semantics we want long term.

The HLO->HLO gather expander inherits the wrapping behavior of dynamic-slice
because it lowers the gather ops to loops of dynamic slices.

PiperOrigin-RevId: 189293175
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc   | 11 ++-
 .../compiler/xla/tests/gather_operation_test.cc    | 80 +++++++++++++++++++++-
 2 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 91341b5..693004d 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -2771,6 +2771,8 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
       gather->gather_dimension_numbers(), /*input_shape=*/operand.shape(),
       /*output_shape=*/shape);
 
+  const Shape& operand_shape = operand.shape();
+
   auto gather_inner_loop_body =
       [&](ArraySlice<int64> output_window_index,
           ArraySlice<int64> input_gather_index,
@@ -2780,9 +2782,16 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
         output_window_index_to_input_index(output_window_index));
     for (int i = 0, e = output_index.size(); i < e; i++) {
       output_index[i] = output_gather_index[i] + output_window_index[i];
+      DCHECK_LT(output_index[i], shape.dimensions(i));
     }
     for (int i = 0, e = input_index.size(); i < e; i++) {
-      input_index[i] = input_gather_index[i] + input_window_index[i];
+      // TODO(b/74360564): We should implement whatever out of bounds behavior
+      // we decide for dynamic-slice here as well.
+      input_index[i] = (input_gather_index[i] + input_window_index[i]) %
+                       operand_shape.dimensions(i);
+      if (input_index[i] < 0) {
+        input_index[i] += operand_shape.dimensions(i);
+      }
     }
     TF_RETURN_IF_ERROR(
         result->CopyElementFrom(operand, input_index, output_index));
diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc
index 4e2f19a..0830e9c 100644
--- a/tensorflow/compiler/xla/tests/gather_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc
@@ -31,12 +31,16 @@ class GatherOperationTest : public HloTestBase {
  protected:
   void RunTest(const string& hlo_text, Literal* operand,
                Literal* gather_indices) {
+    RunTest(hlo_text, {operand, gather_indices});
+  }
+
+  void RunTest(const string& hlo_text,
+               tensorflow::gtl::ArraySlice<Literal*> args) {
     HloModuleConfig config;
     config.set_debug_options(GetDebugOptionsForTest());
     TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
                             tools::Parse(hlo_text, config));
-    EXPECT_TRUE(
-        RunAndCompare(std::move(module), {operand, gather_indices}, nullopt));
+    EXPECT_TRUE(RunAndCompare(std::move(module), args, nullopt));
   }
 };
 
@@ -259,5 +263,77 @@ ENTRY main {
   RunTest(hlo_text, operand.get(), gather_indices.get());
 }
 
+XLA_TEST_F(GatherOperationTest, OutOfBoundsIndex) {
+  // Out of bounds indices must not crash, and the indices in range should
+  // produce the same values across all backends.
+  //
+  // TODO(b/74360564): Once we have a well defined semantics for OOB accesses,
+  // we should get rid of the mask and check that backends produce the same
+  // value for OOB indices too.
+
+  const string hlo_text = R"(
+HloModule BatchDynamicSlice
+
+ENTRY main {
+  operand = s32[3,3]{1,0} parameter(0)
+  indices = s32[6,2]{1,0} parameter(1)
+  gather = s32[6,1,1]{2,1,0} gather(operand, indices),
+      output_window_dims={1,2},
+      elided_window_dims={},
+      gather_dims_to_operand_dims={0,1},
+      index_vector_dim=1,
+      window_bounds={1,1}
+  gather_reshaped = s32[6]{0} reshape(gather)
+  in_bounds_mask = s32[6]{0} parameter(2)
+  ROOT result = s32[6]{0} multiply(gather_reshaped, in_bounds_mask)
+}
+)";
+  std::unique_ptr<Literal> operand =
+      Literal::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  std::unique_ptr<Literal> gather_indices = Literal::CreateR2<int32>(
+      {{2, 7}, {2, 1}, {1, 1}, {5, 1}, {2147483647, 1}, {1, 2}});
+  std::unique_ptr<Literal> in_bounds_mask =
+      Literal::CreateR1<int32>({0, 1, 1, 0, 0, 1});
+
+  RunTest(hlo_text,
+          {operand.get(), gather_indices.get(), in_bounds_mask.get()});
+}
+
+XLA_TEST_F(GatherOperationTest, NegativeIndex) {
+  // Negative indices must not crash, and the indices in range should produce
+  // the same values across all backends.
+  //
+  // TODO(b/74360564): Once we have a well defined semantics for negative
+  // accesses, we should get rid of the mask and check that backends produce the
+  // same value for negative indices too.
+
+  const string hlo_text = R"(
+HloModule BatchDynamicSlice
+
+ENTRY main {
+  operand = s32[3,3]{1,0} parameter(0)
+  indices = s32[6,2]{1,0} parameter(1)
+  gather = s32[6,1,1]{2,1,0} gather(operand, indices),
+      output_window_dims={1,2},
+      elided_window_dims={},
+      gather_dims_to_operand_dims={0,1},
+      index_vector_dim=1,
+      window_bounds={1,1}
+  gather_reshaped = s32[6]{0} reshape(gather)
+  in_bounds_mask = s32[6]{0} parameter(2)
+  ROOT result = s32[6]{0} multiply(gather_reshaped, in_bounds_mask)
+}
+)";
+  std::unique_ptr<Literal> operand =
+      Literal::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  std::unique_ptr<Literal> gather_indices = Literal::CreateR2<int32>(
+      {{2, -1}, {2, 1}, {1, 1}, {-500, 1}, {-2147483648, 1}, {1, 2}});
+  std::unique_ptr<Literal> in_bounds_mask =
+      Literal::CreateR1<int32>({0, 1, 1, 0, 0, 1});
+
+  RunTest(hlo_text,
+          {operand.get(), gather_indices.get(), in_bounds_mask.get()});
+}
+
 }  // namespace
 }  // namespace xla
-- 
2.7.4