Fix padding bug of EmbeddingLookup (#3184)
author장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Thu, 25 Oct 2018 10:42:59 +0000 (19:42 +0900)
committer오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Thu, 25 Oct 2018 10:42:59 +0000 (19:42 +0900)
This commit fixes padding bug of EmbeddingLookup.

Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc

index 5f6069c..ae740bb 100644 (file)
@@ -41,8 +41,6 @@ void SimpleEmbeddingLookup::run()
 
   // type of elements of lookups is always integer
   const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
-  const auto values_buf = _values->buffer();
-  auto output_buf = _output->buffer();
 
   const auto lookups_info = _lookups->info();
   const auto values_info = _values->info();
@@ -50,29 +48,54 @@ void SimpleEmbeddingLookup::run()
 
   // NOTE The first dimension's position is always at the end of dimensions.
   const auto first_dim_pos = values_info->num_dimensions() - 1;
-  ::arm_compute::Coordinates offset_coord{};
-  for (size_t i = 0; i < first_dim_pos; ++i)
-  {
-    offset_coord.set(i, 0);
-  }
 
   const size_t first_dim = values_info->dimension(first_dim_pos);
-  const size_t copy_bytes = values_info->total_size() / first_dim;
   for (size_t i = 0; i < lookups_info->dimension(0); ++i)
   {
     if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim)
       throw std::runtime_error("Embedding Lookup: index out of bounds.");
+  }
+
+  // If each strides of values and output are different, applied padding size of the two tensors are
+  // different, therefore, it can not be copied at once.
+  auto can_copy_at_once = [&]() -> bool {
+    const auto &values_strides = values_info->strides_in_bytes();
+    const auto &output_strides = output_info->strides_in_bytes();
+
+    for (size_t i = 0; i < first_dim_pos; ++i)
+    {
+      if (values_strides[i] != values_strides[i])
+        return false;
+    }
 
-    size_t idx = lookups_buf[i];
-    offset_coord.set(first_dim_pos, idx);
-    size_t values_offset = values_info->offset_element_in_bytes(offset_coord);
-    offset_coord.set(first_dim_pos, i);
-    size_t output_offset = output_info->offset_element_in_bytes(offset_coord);
+    return true;
+  };
 
-    unsigned char *sink_addr = output_buf + output_offset;
-    unsigned char *source_addr = values_buf + values_offset;
-    memcpy(sink_addr, source_addr, copy_bytes);
+  using ::arm_compute::Window;
+  using ::arm_compute::Iterator;
+
+  size_t copy_bytes;
+  Window window;
+  if (can_copy_at_once())
+  {
+    copy_bytes = values_info->total_size() / first_dim;
+    window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
   }
+  else
+  {
+    copy_bytes = values_info->dimension(0) * values_info->element_size();
+    window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+  }
+
+  Iterator it(_output, window);
+  execute_window_loop(window,
+                      [&](const ::arm_compute::Coordinates &id) {
+                        ::arm_compute::Coordinates values_id = id;
+                        const int idx = id[first_dim_pos];
+                        values_id.set(first_dim_pos, lookups_buf[idx]);
+                        memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+                      },
+                      it);
 
   if (::internal::arm_compute::isGpuMode())
   {