[graph_node] handle deprecated stl iterator
authorhyeonseok lee <hs89.lee@samsung.com>
Mon, 17 Jul 2023 11:42:13 +0000 (20:42 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
 - Explicitly provide the parameter as default parameter for stl iterator is deprecated.

Signed-off-by: hyeonseok lee <hs89.lee@samsung.com>
nntrainer/layers/#concat_layer.cpp# [new file with mode: 0644]
nntrainer/models/#neuralnet.cpp# [new file with mode: 0644]
nntrainer/models/#neuralnet.h# [new file with mode: 0644]
nntrainer/models/circle_plus/circle_plus.fbs [new file with mode: 0644]
nntrainer/models/circle_plus/nntrainer.fbs [new file with mode: 0644]
nntrainer/models/circle_plus/test [new file with mode: 0755]
nntrainer/models/circle_plus/test.bin [new file with mode: 0644]
nntrainer/models/circle_plus/test.cpp [new file with mode: 0644]
nntrainer/setProperty [new file with mode: 0644]

diff --git a/nntrainer/layers/#concat_layer.cpp# b/nntrainer/layers/#concat_layer.cpp#
new file mode 100644 (file)
index 0000000..3288448
--- /dev/null
@@ -0,0 +1,199 @@
+meson// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file   concat_layer.cpp
+ * @date   27 Oct 2020
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug    No known bugs except for NYI items
+ * @brief  This is Concat Layer Class for Neural Network
+ *
+ * @todo merge concat and split layer to a common implementation
+ */
+
+#include <concat_layer.h>
+#include <cstring>
+#include <layer_context.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <tensor_dim.h>
+#include <util_func.h>
+#include <iostream>
+
+namespace nntrainer {
+ConcatLayer::ConcatLayer() : Layer(), leading_helper_dim(1) {}
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+void ConcatLayer::finalize(InitLayerContext &context) {
+  auto &concat_dimension_prop = std::get<props::ConcatDimension>(concat_props);
+  /** for backward compatibility, default concat dimension will be channel */
+  /// @todo this is hacky way to force concat dimension to width if channel
+  /// dimension is taken, this is because recurrent realizer, return sequence
+  /// exploits concat layer but have no control over where to stack/axis
+  unsigned int concat_dimension =
+    context.getInputDimensions().front().channel() > 1 ? 3 : 1;
+  if (!concat_dimension_prop.empty())
+    concat_dimension = concat_dimension_prop.get();
+
+  /**
+   * The concat is only done along the axis dimension.
+   * For example, consider 2 inputs a, b with dimensions [b,c,h,w] each
+   * 1. concat_dimension = 1, output_dim = [b,c_a+c_b,h,w]
+   * 2. concat_dimension = 2, output_dim = [b,c,h_a+h_b,w]
+   * 3. concat_dimension = 3, output_dim = [b,c,h,w_a+w_b]
+   */
+  auto const &input_dims = context.getInputDimensions();
+  const TensorDim &input_dim_0 = input_dims[SINGLE_INOUT_IDX];
+  unsigned int concat_dim_val = input_dim_0.getTensorDim(concat_dimension);
+
+  for (unsigned int idx = 1; idx < input_dims.size(); ++idx) {
+    const TensorDim &dim = input_dims[idx];
+
+    for (unsigned int i = 0; i < ml::train::TensorDim::getNumDim(); ++i) {
+      if (i == concat_dimension)
+        continue;
+      NNTR_THROW_IF(input_dim_0[i] != dim[i], std::runtime_error)
+        << "Error: concat layer requires same shape from all input layers "
+           "along non-concat dimension";
+    }
+    concat_dim_val += dim[concat_dimension];
+  }
+
+  TensorDim output_dim = input_dim_0;
+  output_dim.setTensorDim(concat_dimension, concat_dim_val);
+
+  context.setOutputDimensions({output_dim});
+
+  /**
+   * Setup output_reshape_helper to which output will be reshaped in forwarding
+   * to facilitate easier processing.
+   *
+   * The helper shape consolidates all the dimensions before the axis
+   * together and all the dimensions after the axis to faciliate
+   * easier splitting of the data.
+   */
+  leading_helper_dim = 1;
+  output_reshape_helper.channel(1);
+  output_reshape_helper.height(1);
+  output_reshape_helper.width(1);
+  for (unsigned int idx = 1; idx < concat_dimension; ++idx) {
+    leading_helper_dim *= output_dim.getTensorDim(idx);
+  }
+
+  output_reshape_helper.height(output_dim.getTensorDim(concat_dimension));
+
+  for (unsigned int idx = concat_dimension + 1;
+       idx < ml::train::TensorDim::getNumDim(); ++idx) {
+    output_reshape_helper.width(output_reshape_helper.width() *
+                                output_dim.getTensorDim(idx));
+  }
+
+  /**
+   * Setup input_reshape_helper to which inputs will be reshaped in forwarding
+   * to facilitate easier processing.
+   */
+  input_reshape_helper.resize(input_dims.size());
+  for (unsigned int idx = 0; idx < input_reshape_helper.size(); idx++) {
+    input_reshape_helper[idx] = output_reshape_helper;
+    input_reshape_helper[idx].height(
+      input_dims[idx].getTensorDim(concat_dimension));
+  }
+
+  setBatch(input_dims[SINGLE_INOUT_IDX].batch());
+}
+
+void ConcatLayer::forwarding(RunLayerContext &context, bool training) {
+  /**
+   * @todo avoid copy by creating input here as a shared_tensor of the output
+   * here and then this layer can be in_place as well
+   */
+  Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
+
+  const TensorDim out_dim = output.getDim();
+  output.reshape(output_reshape_helper);
+  unsigned int output_height_offset = 0;
+  unsigned int data_copy_size = output_reshape_helper.width();
+
+  for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
+    Tensor &input = context.getInput(idx);
+    const TensorDim in_dim = input.getDim();
+    auto const &irh = input_reshape_helper[idx];
+    input.reshape(irh);
+
+    /** loop over the dimensions before the concat dimension */
+    for (unsigned int batch = 0; batch < output.batch(); batch++) {
+      /** loop over the concat dimension itself */
+      for (unsigned int count = 0; count < irh.height(); count++) {
+        Tensor dest_tensor = Tensor::Map(
+          output.getAddress(batch, 0, output_height_offset + count, 0),
+          data_copy_size * sizeof(float), {1, 1, 1, data_copy_size});
+        const Tensor source_tensor = Tensor::Map(
+          input.getAddress(batch, 0, count, 0), data_copy_size * sizeof(float),
+          {1, 1, 1, data_copy_size});
+        dest_tensor.copy(source_tensor);
+      }
+    }
+
+    input.reshape(in_dim);
+    output_height_offset += irh.height();
+  }
+
+  output.reshape(out_dim);
+}
+
+void ConcatLayer::calcDerivative(RunLayerContext &context) {
+  /**
+   * @todo avoid copy by creating input here as a shared_tensor of the output
+   * here and then this layer can be in_place as well
+   */
+  Tensor output = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+
+  output.reshape(output_reshape_helper);
+  unsigned int output_height_offset = 0;
+  unsigned int data_copy_size = output_reshape_helper.width();
+
+  for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
+    Tensor &input = context.getOutgoingDerivative(idx);
+    const TensorDim in_dim = input.getDim();
+    auto const &irh = input_reshape_helper[idx];
+    input.reshape(irh);
+
+    /** loop over the dimensions before the concat dimension */
+    for (unsigned int batch = 0; batch < output.batch(); batch++) {
+      /** loop over the concat dimension itself */
+      for (unsigned int count = 0; count < irh.height(); count++) {
+        const Tensor source_tensor = Tensor::Map(
+          output.getAddress(batch, 0, output_height_offset + count, 0),
+          data_copy_size * sizeof(float), {1, 1, 1, data_copy_size});
+        Tensor dest_tensor = Tensor::Map(input.getAddress(batch, 0, count, 0),
+                                         data_copy_size * sizeof(float),
+                                         {1, 1, 1, data_copy_size});
+        dest_tensor.copy(source_tensor);
+      }
+    }
+
+    input.reshape(in_dim);
+    output_height_offset += irh.height();
+  }
+}
+
+void ConcatLayer::setProperty(const std::vector<std::string> &values) {
+  for(auto s: values)
+    std::cout << s<< std::endl;
+  auto remain_props = loadProperties(values, concat_props);
+
+  NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
+    << "[ConcatLayer] Unknown Layer Properties count " +
+         std::to_string(values.size());
+}
+
+void ConcatLayer::exportTo(Exporter &exporter,
+                           const ml::train::ExportMethods &method) const {
+  Layer::exportTo(exporter, method);
+  exporter.saveResult(concat_props, method, this);
+}
+
+} /* namespace nntrainer */
diff --git a/nntrainer/models/#neuralnet.cpp# b/nntrainer/models/#neuralnet.cpp#
new file mode 100644 (file)
index 0000000..5b230a9
--- /dev/null
@@ -0,0 +1,1350 @@
+/**
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * @file       neuralnet.cpp
+ * @date       04 December 2019
+ * @brief      This is Neural Network Class
+ * @see                https://github.com/nnstreamer/nntrainer
+ * @author     Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug                No known bugs except for NYI items
+ *
+ */
+
+#include "layer_context.h"
+#include "model_common_properties.h"
+#include <cmath>
+#include <cstring>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include <activation_realizer.h>
+#include <common_properties.h>
+#include <databuffer.h>
+#include <flatten_realizer.h>
+#include <ini_interpreter.h>
+#include <ini_wrapper.h>
+#include <input_realizer.h>
+#include <model_loader.h>
+#include <multiout_realizer.h>
+#include <neuralnet.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <optimizer_context.h>
+#include <previous_input_realizer.h>
+#include <profiler.h>
+#include <recurrent_realizer.h>
+#include <remap_realizer.h>
+#include <slice_realizer.h>
+#include <util_func.h>
+
+#ifdef ENABLE_TFLITE_INTERPRETER
+#include <tflite_interpreter.h>
+#endif
+
+/**
+ * @brief Internal enum values for nntrainer to summarize model accuracy & loss
+ */
+#define ML_TRAIN_SUMMARY_MODEL_TRAIN_LOSS 101
+#define ML_TRAIN_SUMMARY_MODEL_VALID_LOSS 102
+#define ML_TRAIN_SUMMARY_MODEL_VALID_ACCURACY 103
+
+namespace nntrainer {
+
+NeuralNetwork::NeuralNetwork() :
+  model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm()),
+  model_flex_props(
+    props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
+    props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
+    props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
+    props::TensorFormat(), props::ModelTensorDataType()),
+  load_path(std::string()),
+  epoch_idx(0),
+  iter(0),
+  loss(0.0f),
+  data_buffers({nullptr, nullptr, nullptr}),
+  initialized(false),
+  compiled(false),
+  loadedFromConfig(false) {
+  app_context = AppContext(AppContext::Global());
+  
+}
+
+NeuralNetwork::NeuralNetwork(AppContext app_context_) :
+  model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm()),
+  model_flex_props(
+    props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
+    props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
+    props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
+    props::TensorFormat(), props::ModelTensorDataType()),
+  load_path(std::string()),
+  epoch_idx(0),
+  iter(0),
+  loss(0.0f),
+  data_buffers({nullptr, nullptr, nullptr}),
+  initialized(false),
+  compiled(false),
+  loadedFromConfig(false),
+  app_context(app_context_) {}
+
+int NeuralNetwork::loadFromConfig(const std::string &config) {
+  if (loadedFromConfig == true) {
+    ml_loge("cannnot do loadFromConfig twice");
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+
+  ModelLoader loader(app_context);
+  NeuralNetwork tempNet(*this);
+
+  int status = loader.loadFromContext(tempNet);
+  if (status != ML_ERROR_NONE) {
+    return status;
+  }
+
+  status = loader.loadFromConfig(config, tempNet);
+  if (status != ML_ERROR_NONE) {
+    return status;
+  }
+
+  tempNet.loadedFromConfig = true;
+  swap(tempNet, *this);
+
+  return ML_ERROR_NONE;
+}
+
+unsigned int NeuralNetwork::getCurrentEpoch() {
+#ifdef DEBUG
+  ml_logd("[NNTrainer] Current epoch: %d", epoch_idx);
+#endif
+  return epoch_idx;
+};
+
+void NeuralNetwork::setProperty(const std::vector<std::string> &values) {
+  auto left_props = loadProperties(values, model_props);
+  setTrainConfig(left_props);
+}
+
+void NeuralNetwork::setTrainConfig(const std::vector<std::string> &values) {
+  auto left_props = loadProperties(values, model_flex_props);
+  NNTR_THROW_IF(left_props.size(), std::invalid_argument)
+    << "Model has unparsed properties, size: " << left_props.size()
+    << " of first element: " << left_props.front();
+}
+
+int NeuralNetwork::compile() {
+  std::string loss_type = std::get<props::LossType>(model_props).empty()
+                            ? std::string()
+                            : std::get<props::LossType>(model_props);
+
+  auto &input_conn = std::get<std::vector<props::InputConnection>>(model_props);
+  /// @note label layer might need to be treated in the similar way as well
+
+  /// @todo make NetworkGraph compiled at the construction instead of having
+  /// graph.compile(), neuralnetwork have ownership of list of layer nodes,
+  /// which will be passed at compile time.
+
+  std::vector<std::unique_ptr<GraphRealizer>> realizers;
+
+  realizers.emplace_back(new PreviousInputRealizer(
+    std::vector<Connection>(input_conn.begin(), input_conn.end())));
+  realizers.emplace_back(new MultioutRealizer());
+  realizers.emplace_back(new FlattenRealizer());
+  realizers.emplace_back(new ActivationRealizer());
+
+  for (auto &realizer : realizers) {
+    graph_representation = realizer->realize(graph_representation);
+  }
+
+  bool memory_swap = std::get<props::MemorySwap>(model_flex_props);
+  const std::string memory_swap_path =
+    std::get<props::MemorySwapPath>(model_flex_props);
+  unsigned int lookahead =
+    std::get<props::MemorySwapLookahead>(model_flex_props);
+
+  const std::string tensor_format =
+    to_string(std::get<props::TensorFormat>(model_flex_props));
+
+  const std::string tensor_type =
+    to_string(std::get<props::ModelTensorDataType>(model_flex_props));
+
+  model_graph = NetworkGraph(memory_swap, memory_swap_path, lookahead,
+                             tensor_format, tensor_type);
+
+  model_graph.setMemoryOptimizations(
+    std::get<props::MemoryOptimization>(model_flex_props));
+  for (auto &node : graph_representation) {
+    if (auto &prop = std::get<props::ClipGradByGlobalNorm>(model_props);
+        !prop.empty()) {
+      node->setProperty({"clip_grad_by_norm=" + to_string(prop)});
+    }
+    model_graph.addLayer(node);
+  }
+
+  int status = model_graph.compile(loss_type);
+  NN_RETURN_STATUS();
+
+  compiled = true;
+
+  return status;
+}
+
+int NeuralNetwork::initialize() {
+  int status = ML_ERROR_NONE;
+
+  if (initialized) {
+    ml_loge("Error: Initializing the model again");
+    return ML_ERROR_NOT_SUPPORTED;
+  }
+
+  if (!compiled) {
+    ml_loge("Error: Need to compile first");
+    return ML_ERROR_NOT_SUPPORTED;
+  }
+
+  unsigned int n_layers = (unsigned int)model_graph.size();
+
+  ml_logd("initializing neural network, layer size: %d", n_layers);
+  PROFILE_MEM_ANNOTATE("Initialize");
+
+  auto &input_conn_prop =
+    std::get<std::vector<props::InputConnection>>(model_props);
+  auto &label_layer_prop =
+    std::get<std::vector<props::LabelLayer>>(model_props);
+
+  std::vector<Connection> input_conn(input_conn_prop.begin(),
+                                     input_conn_prop.end());
+  std::vector<std::string> label_layers;
+
+  if (!label_layer_prop.empty()) {
+    label_layers = std::vector<std::string>(label_layer_prop.begin(),
+                                            label_layer_prop.end());
+  }
+
+  status = model_graph.initialize(
+    input_conn,
+    std::vector<Connection>(label_layers.begin(), label_layers.end()));
+  NN_RETURN_STATUS();
+
+  model_graph.setBatchSize(
+    std::get<props::TrainingBatchSize>(model_flex_props));
+
+  // initialize optimizer and related variables
+  /// @todo: initialize should take a mode and check if mode is train but
+  /// optimizer is not given, make it as a hard error
+  if (opt) {
+    /** TODO: update request of optimizer to be of same format as
+     * Layer::requestTensor */
+    opt->finalize();
+    std::function<std::vector<TensorDim>(const TensorDim &)> cb =
+      [this](const TensorDim &dim) {
+        return opt->getOptimizerVariableDim(dim);
+      };
+    model_graph.requestOptimizerVariable(cb, true);
+  }
+
+  // Allocate weights
+  model_graph.allocateWeights();
+
+  initialized = true;
+
+  if (!load_path.empty()) {
+    load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
+  }
+
+  return status;
+}
+
+/**
+ * @brief     free layers
+ */
+NeuralNetwork::~NeuralNetwork() { deallocate(); }
+
+/**
+ * @brief     forward propagation using layers object which has layer
+ */
+sharedConstTensors NeuralNetwork::forwarding(
+  bool training, std::function<bool(void *userdata)> stop_cb, void *userdata) {
+  std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op =
+    [this, stop_cb, userdata](std::shared_ptr<LayerNode> node,
+                              bool training) -> void {
+    (void)this;
+    PROFILE_MEM_ANNOTATE("Forwarding for layer: " + node->getName());
+
+    auto f = std::get<0>(node->getExecutionOrder());
+    model_graph.flushCacheExcept(f);
+
+    node->forwarding(training);
+  };
+
+  return model_graph.forwarding(training, forwarding_op, stop_cb, userdata);
+}
+
+/**
+ * @brief     forward propagation using layers object which has layer
+ */
+sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input,
+                                             sharedConstTensors label,
+                                             bool training) {
+  auto current_batch = model_graph.getBatchSize();
+  NNTR_THROW_IF(input[0]->batch() != current_batch ||
+                  (!label.empty() && label[0]->batch() != current_batch),
+                std::logic_error)
+    << "Error: mismatch in batchsize for data and model."
+    << " input_batch: " << input[0]->batch()
+    << " label_batch: " << label[0]->batch()
+    << " target_batch: " << current_batch;
+
+  model_graph.setInputsLabels(input, label);
+
+  return forwarding(training);
+}
+
+/**
+ * @brief     back propagation
+ *            Call backwarding function of layer in reverse order
+ *            No need to call at first Input Layer (No data to be updated)
+ */
+void NeuralNetwork::backwarding(int iteration,
+                                std::function<bool(void *userdata)> stop_cb,
+                                void *userdata) {
+
+#ifdef DEBUG
+  NNTR_THROW_IF(!opt, std::invalid_argument) << "optimizer is null!";
+#endif
+
+  std::function<void(std::shared_ptr<LayerNode>, int)> backwarding_op =
+    [this, stop_cb, userdata](std::shared_ptr<LayerNode> node,
+                              int iteration) -> void {
+    /**
+     * Do not change this order:
+     * 1. calcGradient
+     * 2. calcDerivative
+     * 3. applyGradient
+     * 4. gradientClippingOnLastAccess
+     */
+
+    model_graph.flushCacheExcept(std::get<1>(node->getExecutionOrder()));
+    PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
+
+    bool apply_gradient = true;
+    if (node->getTrainable()) {
+      /** If gradient optimization mode, then calculate gradient first */
+      if (dynamic_training_opt.isGradientMode())
+        node->calcGradient();
+
+      /**
+       * If optimization off, or gradient must be applied, then this will be
+       * true
+       * @todo This apply gradient should be passed to the each weight and later
+       * be queried when updating gradient at once. (after moving apply_gradient
+       * out of this function)
+       *
+       */
+      // auto &layer = node->getObject();
+      // apply_gradient = dynamic_training_opt.checkIfApply(
+      //   layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
+      //   opt, iteration);
+
+      /** If gradient must be applied and its not gradient mode, calculate
+       * gradient
+       */
+      if (!dynamic_training_opt.isGradientMode() && apply_gradient)
+        node->calcGradient();
+    }
+
+    model_graph.flushCacheExcept(std::get<2>(node->getExecutionOrder()));
+    PROFILE_MEM_ANNOTATE("CalcDerivative: " + node->getName());
+
+    if (stop_cb(userdata)) {
+      return;
+    }
+
+    if (node->needsCalcDerivative())
+      node->calcDerivative();
+
+    model_graph.flushCacheExcept(std::get<3>(node->getExecutionOrder()));
+    PROFILE_MEM_ANNOTATE("ApplyGradient: " + node->getName());
+
+    if (apply_gradient) {
+      /// Apply gradient only at the end of the last shared weight access
+      model_graph.applyGradients(
+        node.get(), [iteration, opt_ = opt.get()](Weight &w) {
+          w.calcRegularizationGradient();
+          w.calcWeightDecayGradient();
+          RunOptimizerContext opt_context(&w, iteration,
+                                          opt_->getLearningRate(iteration));
+          opt_->applyGradient(opt_context);
+        });
+    }
+  };
+
+  std::function<void(Weight &, int)> apply_grad_clip_op =
+    [opt_ = opt.get()](Weight &w, int iteration) -> void {
+    w.calcRegularizationGradient();
+    w.calcWeightDecayGradient();
+    RunOptimizerContext opt_context(&w, iteration,
+                                    opt_->getLearningRate(iteration));
+    opt_->applyGradient(opt_context);
+  };
+
+  model_graph.backwarding(iteration, backwarding_op, apply_grad_clip_op,
+                          stop_cb, userdata);
+}
+
+void NeuralNetwork::save(const std::string &file_path,
+                         ml::train::ModelFormat format) {
+  NNTR_THROW_IF(!initialized, std::runtime_error)
+    << "Cannot save model if not initialized yet, path: " << file_path
+    << " format: " << static_cast<unsigned>(format);
+
+  /// @todo this switch case should be delegating the function call only. It's
+  /// not delegating for now as required logics are managable for now.
+  switch (format) {
+  case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
+    auto model_file = checkedOpenStream<std::ofstream>(
+      file_path, std::ios::out | std::ios::binary | std::ios::trunc);
+    for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+      (*iter)->save(model_file);
+    }
+    if (opt && istrequal(opt->getType(), "adam")) {
+      std::string adam = "adam";
+      model_file.write(adam.c_str(), 4);
+      for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
+           iter++) {
+        (*iter)->save(model_file, true);
+      }
+    }
+
+    model_file.write((char *)&epoch_idx, sizeof(epoch_idx));
+    model_file.write((char *)&iter, sizeof(iter));
+
+    model_file.close();
+    break;
+  }
+  case ml::train::ModelFormat::MODEL_FORMAT_INI:
+    saveModelIni(file_path);
+    break;
+
+  case ml::train::ModelFormat::MODEL_FORMAT_INI_WITH_BIN: {
+    auto old_save_path = std::get<props::SavePath>(model_flex_props);
+    auto bin_file_name =
+      file_path.substr(0, file_path.find_last_of('.')) + ".bin";
+
+    std::get<props::SavePath>(model_flex_props).set(bin_file_name);
+    save(file_path, ml::train::ModelFormat::MODEL_FORMAT_INI);
+    save(bin_file_name, ml::train::ModelFormat::MODEL_FORMAT_BIN);
+    std::get<props::SavePath>(model_flex_props) = old_save_path;
+    break;
+  }
+  default:
+    throw nntrainer::exception::not_supported(
+      "saving with given format is not supported yet");
+  }
+}
+
+void NeuralNetwork::load(const std::string &file_path,
+                         ml::train::ModelFormat format) {
+  /// @todo this switch case should be delegating the function call only. It's
+  /// not delegating for now as required logics are managable for now.
+  switch (format) {
+  case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
+    NNTR_THROW_IF(!initialized, std::runtime_error)
+      << "Cannot load if not initialized yet, path: " << file_path
+      << " format: " << static_cast<unsigned>(format);
+
+    auto model_file = checkedOpenStream<std::ifstream>(
+      file_path, std::ios::in | std::ios::binary);
+    for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+      (*iter)->read(model_file);
+    }
+    try {
+      /// this is assuming that the failure is allowed at the end of the file
+      /// read. so, after this line, additional read shouldn't be called
+      if (opt && istrequal(opt->getType(), "adam")) {
+        std::string opt_type;
+        opt_type.resize(4);
+        model_file.read((char *)&opt_type[0], 4);
+        if (istrequal(opt_type, "adam")) {
+          for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
+               iter++) {
+            (*iter)->read(model_file, true);
+          }
+        }
+      }
+
+      checkedRead(model_file, (char *)&epoch_idx, sizeof(epoch_idx),
+                  "[NeuralNetwork::readModel] failed to read epoch_idx");
+      checkedRead(model_file, (char *)&iter, sizeof(iter),
+                  "[NeuralNetwork::readModel] failed to read iteration");
+    } catch (...) {
+      std::cerr << "failed to read additional data like optimizer variable, "
+                   "iteration, proceeding with default\n";
+    }
+
+    ml_logi("read modelfile: %s", file_path.c_str());
+    break;
+  }
+  case ml::train::ModelFormat::MODEL_FORMAT_INI_WITH_BIN: {
+    int ret = loadFromConfig(file_path);
+    throw_status(ret);
+    auto &save_path = std::get<props::SavePath>(model_flex_props);
+    if (!save_path.empty()) {
+      checkedOpenStream<std::ifstream>(save_path,
+                                       std::ios::in | std::ios::binary);
+      load_path = save_path;
+    }
+    break;
+  }
+  case ml::train::ModelFormat::MODEL_FORMAT_INI: {
+    int ret = loadFromConfig(file_path);
+    throw_status(ret);
+    break;
+  }
+  case ml::train::ModelFormat::MODEL_FORMAT_FLATBUFFER: {
+    break;
+  }
+  default:
+    throw nntrainer::exception::not_supported(
+      "loading with given format is not supported yet");
+  }
+}
+
+float NeuralNetwork::getLoss() {
+  loss = 0.0f;
+
+  for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+    loss += (*iter)->getLoss();
+  }
+  return loss;
+}
+
+void NeuralNetwork::setLoss(float l) { loss = l; }
+
+NeuralNetwork &NeuralNetwork::copy(NeuralNetwork &from) {
+  if (this != &from) {
+    model_props = from.model_props;
+    model_flex_props = from.model_flex_props;
+    loss = from.loss;
+    opt = from.opt;
+
+    model_graph.copy(from.model_graph);
+  }
+  return *this;
+}
+
+void NeuralNetwork::saveModelIni(const std::string &file_path) {
+  NNTR_THROW_IF(isFileExist(file_path), std::invalid_argument)
+    << "There is already a file, overriding to the exisiting file is not "
+       "permitted, path: "
+    << file_path;
+
+  std::vector<IniSection> sections;
+
+  IniSection model_section = IniSection::FromExportable("model", *this);
+  model_section.setEntry("type", "NeuralNetwork");
+  sections.push_back(model_section);
+
+  auto add_section_if_any = [&sections](const std::string &section_name,
+                                        auto obj_ptr, auto pred) {
+    if (pred(obj_ptr)) {
+      IniSection s = IniSection::FromExportable(section_name, *obj_ptr);
+      s.setEntry("type", obj_ptr->getType());
+      sections.push_back(s);
+    }
+  };
+
+  add_section_if_any("optimizer", opt,
+                     [](const auto &obj) { return static_cast<bool>(obj); });
+
+  auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
+  auto data_buffer_valid = [](const auto &buffer) {
+    return buffer && buffer->isSerializable(
+                       ml::train::ExportMethods::METHOD_STRINGVECTOR);
+  };
+
+  add_section_if_any("train_set", train_buffer, data_buffer_valid);
+  add_section_if_any("valid_set", valid_buffer, data_buffer_valid);
+  add_section_if_any("test_set", test_buffer, data_buffer_valid);
+
+  IniWrapper wrapper("model_saver", sections);
+  wrapper.save_ini(file_path);
+
+  IniGraphInterpreter interpreter;
+  interpreter.serialize(graph_representation, file_path);
+}
+
+bool NeuralNetwork::validateInput(sharedConstTensors X) {
+  auto input_dim = getInputDimension();
+  if (X.size() != input_dim.size()) {
+    ml_loge("Error: provided number of inputs %d, required %d", (int)X.size(),
+            (int)input_dim.size());
+    return false;
+  }
+
+  for (unsigned int dim = 0; dim < input_dim.size(); dim++) {
+    if (input_dim[dim] != X[dim]->getDim()) {
+      ml_loge("Error: provided input shape does not match required shape");
+      std::stringstream ss;
+      ss << X[dim]->getDim();
+      ml_loge("Provided tensor summary : %s", ss.str().c_str());
+
+      ss.str(std::string());
+      ss << input_dim[dim];
+      ml_loge("Required tensor summary : %s", ss.str().c_str());
+      return false;
+    }
+  }
+
+  return true;
+}
+
+sharedConstTensors NeuralNetwork::inference(sharedConstTensors X,
+                                            bool free_mem) {
+  return inference(X, {}, free_mem);
+}
+
+sharedConstTensors NeuralNetwork::inference(sharedConstTensors X,
+                                            sharedConstTensors label,
+                                            bool free_mem) {
+  if (model_graph.getBatchSize() != X[0]->batch()) {
+    model_graph.setBatchSize(X[0]->batch());
+  }
+
+  sharedConstTensors out;
+  if (!validateInput(X))
+    throw std::invalid_argument("Input validation failed.");
+
+  allocate(ExecutionMode::INFERENCE);
+
+  int nn_foward;
+  PROFILE_TIME_REGISTER_EVENT(nn_foward, "nn_forward");
+  PROFILE_TIME_START(nn_foward);
+  out = forwarding(X, label, false);
+  PROFILE_TIME_END(nn_foward);
+
+  if (free_mem)
+    /**
+     * Free the memory needed for training before exiting.
+     * Note that this does not free the weights for the model.
+     * Weights of the model will be freed when the model is destroyed.
+     */
+    model_graph.deallocateTensors(false);
+
+  /** Clear the set inputs and labels */
+  model_graph.setInputsLabels({}, {});
+
+  return out;
+}
+
+std::vector<float *>
+NeuralNetwork::inference(unsigned int batch_size,
+                         const std::vector<float *> &input,
+                         const std::vector<float *> &label) {
+  sharedConstTensors input_tensors, output_tensors;
+  auto in_dim = getInputDimension();
+
+  input_tensors.reserve(input.size());
+  for (unsigned int idx = 0; idx < in_dim.size(); idx++) {
+    in_dim[idx].batch(batch_size);
+    input_tensors.emplace_back(MAKE_SHARED_TENSOR(Tensor::Map(
+      input[idx], in_dim[idx].getDataLen() * sizeof(float), in_dim[idx], 0)));
+  }
+
+  if (!label.empty()) {
+    sharedConstTensors label_tensors;
+    auto label_dim = getOutputDimension();
+    label_tensors.reserve(label.size());
+    for (unsigned int idx = 0; idx < label_dim.size(); idx++) {
+      label_dim[idx].batch(batch_size);
+      label_tensors.emplace_back(MAKE_SHARED_TENSOR(
+        Tensor::Map(label[idx], label_dim[idx].getDataLen() * sizeof(float),
+                    label_dim[idx], 0)));
+    }
+    output_tensors = inference(input_tensors, label_tensors, false);
+  } else {
+    output_tensors = inference(input_tensors, false);
+  }
+
+  std::vector<float *> output;
+  output.reserve(output_tensors.size());
+
+  for (auto &out : output_tensors) {
+    auto out_t = *out.get();
+    output.push_back(out_t.getData());
+  }
+
+  return output;
+}
+
+int NeuralNetwork::setDataset(const DatasetModeType &mode,
+                              std::shared_ptr<ml::train::Dataset> dataset) {
+  return setDataBuffer(mode, std::static_pointer_cast<DataBuffer>(dataset));
+}
+
+int NeuralNetwork::allocate(ExecutionMode mode) {
+  model_graph.deallocateTensors();
+  model_graph.allocateTensors(mode);
+
+  return ML_ERROR_NONE;
+}
+
+int NeuralNetwork::deallocate() {
+  model_graph.deallocateTensors(true);
+
+  return ML_ERROR_NONE;
+}
+
+int NeuralNetwork::train(const std::vector<std::string> &values,
+                         std::function<bool(void *)> stop_cb,
+                         void *stop_user_data,
+                         std::function<void(void *)> epoch_complete_cb,
+                         void *epoch_user_data) {
+  int status = ML_ERROR_NONE;
+
+  if (data_buffers[static_cast<int>(DatasetModeType::MODE_TRAIN)] == nullptr) {
+    ml_loge("Cannot initialize the model without the train data buffer.");
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+
+  if (!opt) {
+    ml_loge("Cannot train network without optimizer.");
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+
+  setTrainConfig(values);
+
+  /** set batch size just before training */
+  model_graph.setBatchSize(
+    std::get<props::TrainingBatchSize>(model_flex_props));
+
+  status = allocate(ExecutionMode::TRAIN);
+  NN_RETURN_STATUS();
+
+  status =
+    train_run(stop_cb, stop_user_data, epoch_complete_cb, epoch_user_data);
+  NN_RETURN_STATUS();
+
+  /**
+   * Free the memory needed for training before exiting.
+   * Note that this does not free the weights for the model.
+   * Weights of the model will be freed when the model is destroyed.
+   */
+  model_graph.deallocateTensors(false);
+  return status;
+}
+
+/**
+ * @brief     Run NeuralNetwork train with callback function by user
+ */
+int NeuralNetwork::train_run(
+  std::function<bool(void *userdata)> stop_cb, void *stop_user_data,
+  std::function<void(void *userdata)> epoch_complete_cb,
+  void *epoch_user_data) {
+  int status = ML_ERROR_NONE;
+
+  if (!std::get<props::ContinueTrain>(model_flex_props)) {
+    epoch_idx = 0;
+    iter = 0;
+    for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+      (*iter)->clearOptVar();
+    }
+  }
+
+  auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
+
+  auto const &outputs = model_graph.getOutputTensors();
+  auto in_dims = model_graph.getInputDimension();
+  auto label_dims = model_graph.getOutputDimension();
+
+  auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
+
+  if (train_buffer == nullptr) {
+    ml_loge("[NeuralNetworks] there is no train dataset!");
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+
+  /**
+   * @brief run a single epoch with given callback, @a auto is used instead of
+   * std::function for performance measure
+   * @param buffer buffer to run
+   * @param shuffle whether to shuffle or not
+   * @param on_iteration_fetch function that will recieve reference to stat,
+   * buffer which will be called every time data is fetched and set
+   * @param on_epoch_end function that will recieve reference to stat,
+   * buffer which will be called on the epoch end
+   */
+  auto run_epoch = [this, &in_dims, &label_dims, &outputs, batch_size](
+                     DataBuffer *buffer, bool shuffle,
+                     auto &&on_iteration_fetch, auto &&on_iteration_update_stat,
+                     auto &&on_epoch_end, RunStats &stat) {
+    /// @todo managing metrics must be handled here as well!! for now it is
+    /// handled in individual callbacks
+    // RunStats stat;
+
+    stat.accuracy = 0.0;
+    stat.loss = 0.0;
+    stat.num_iterations = 0;
+    stat.num_correct_predictions = 0;
+    stat.max_epoch = getEpochs();
+    stat.epoch_idx = epoch_idx;
+
+    std::future<std::shared_ptr<IterationQueue>> future_iq =
+      buffer->startFetchWorker(in_dims, label_dims, shuffle);
+    while (true) {
+      ScopedView<Iteration> iter_view = buffer->fetch();
+      if (iter_view.isEmpty()) {
+        break;
+      }
+      auto &iteration = iter_view.get();
+      if (iteration.batch() != batch_size) {
+        /// @todo support partial batch
+        continue;
+      }
+
+      auto const &labels = iteration.getLabelsRef();
+      auto const &inputs = iteration.getInputsRef();
+      model_graph.setInputsLabels(inputs, labels);
+
+      on_iteration_fetch(stat, *buffer);
+      on_iteration_update_stat(stat, outputs, labels);
+    }
+    future_iq.get();
+    on_epoch_end(stat, *buffer);
+
+    if (stat.num_iterations == 0) {
+      throw std::runtime_error("No data came while buffer ran");
+    }
+
+    return stat;
+  };
+
+  auto train_for_iteration =
+    [this, stop_cb, stop_user_data](RunStats &stat, DataBuffer &buffer) {
+      forwarding(true, stop_cb, stop_user_data);
+      backwarding(iter++, stop_cb, stop_user_data);
+
+      // To avoid unconsidered memory leak, we need to clear the cache
+      model_graph.flushCache();
+
+      if (!stop_cb(stop_user_data)) {
+        std::cout << "#" << epoch_idx << "/" << getEpochs();
+        ml_logi("# %d / %d", epoch_idx, getEpochs());
+        auto loss = getLoss();
+        buffer.displayProgress(stat.num_iterations, loss);
+      }
+    };
+
+  auto update_train_stat = [this](RunStats &stat,
+                                  const std::vector<Tensor> &outputs,
+                                  const std::vector<Tensor> &labels) {
+    stat.loss += getLoss();
+    stat.num_iterations++;
+  };
+
+  auto train_epoch_end = [this, stop_cb, stop_user_data](RunStats &stat,
+                                                         DataBuffer &buffer) {
+    if (stat.num_iterations != 0) {
+      stat.loss /= static_cast<float>(stat.num_iterations);
+    } else {
+      std::cerr << "stat.num_iterations is 0" << std::endl;
+      return;
+    }
+    auto &save_path = std::get<props::SavePath>(model_flex_props);
+    if (!stop_cb(stop_user_data)) {
+      if (!save_path.empty()) {
+        save(save_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
+      }
+
+      std::cout << "#" << epoch_idx << "/" << getEpochs()
+                << " - Training Loss: " << stat.loss;
+      ml_logi("# %d / %d - Training Loss: %f", epoch_idx, getEpochs(),
+              stat.loss);
+      ml_logd("[NNTrainer] Training epoch %d / %d finished successfully.",
+              epoch_idx, getEpochs());
+    } else {
+      ml_logd("[NNTrainer] Training stopped by stop callback function during "
+              "epoch %d.",
+              epoch_idx);
+    }
+  };
+
+  auto eval_for_iteration = [this, batch_size, stop_cb, stop_user_data](
+                              RunStats &stat, DataBuffer &buffer) {
+    forwarding(false, stop_cb, stop_user_data);
+  };
+
+  auto update_eval_stat = [batch_size, &update_train_stat](
+                            RunStats &stat, const std::vector<Tensor> &outputs,
+                            const std::vector<Tensor> &labels) {
+    auto model_out = outputs[0].argmax();
+    auto label_out = labels[0].argmax();
+
+    for (unsigned int b = 0; b < batch_size; b++) {
+      if (model_out[b] == label_out[b])
+        stat.num_correct_predictions++;
+    }
+
+    update_train_stat(stat, outputs, labels);
+  };
+
+  auto eval_epoch_end = [this, batch_size, max_acc = 0.0f,
+                         min_loss = std::numeric_limits<float>::max()](
+                          RunStats &stat, DataBuffer &buffer) mutable {
+    if (stat.num_iterations != 0) {
+      stat.loss /= static_cast<float>(stat.num_iterations);
+    } else {
+      std::cerr << "stat.num_iterations is 0" << std::endl;
+      return;
+    }
+    stat.accuracy = stat.num_correct_predictions /
+                    static_cast<float>(stat.num_iterations * batch_size) *
+                    100.0f;
+
+    if (stat.accuracy > max_acc ||
+        (stat.accuracy == max_acc && stat.loss < min_loss)) {
+      max_acc = stat.accuracy;
+      /// @note this is not actually 'the' min loss for whole time but records
+      /// when data change
+      min_loss = stat.loss;
+      auto &save_best_path = std::get<props::SaveBestPath>(model_flex_props);
+      if (!save_best_path.empty()) {
+        save(save_best_path);
+      }
+    }
+    std::cout << " >> [ Accuracy: " << stat.accuracy
+              << "% - Validation Loss : " << stat.loss << " ]";
+    ml_logi("[ Accuracy: %.2f %% - Validataion Loss: %.5f", stat.accuracy,
+            stat.loss);
+  };
+
+  PROFILE_MEM_ANNOTATE("TRAIN START");
+  auto epochs = getEpochs();
+  ml_logd("[NNTrainer] Starts training. Current epoch: %d. Total epochs: %d.",
+          epoch_idx + 1, getEpochs());
+  for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
+    if (stop_cb(stop_user_data)) {
+      --epoch_idx;
+      break;
+    }
+    training = run_epoch(train_buffer.get(), true, train_for_iteration,
+                         update_train_stat, train_epoch_end, training);
+    if (valid_buffer) {
+      validation = run_epoch(valid_buffer.get(), false, eval_for_iteration,
+                             update_eval_stat, eval_epoch_end, validation);
+    }
+    std::cout << '\n';
+    epoch_complete_cb(epoch_user_data);
+  }
+  PROFILE_MEM_ANNOTATE("TRAIN END");
+
+  if (test_buffer) {
+    std::cout << "Evaluation with test data...\n";
+    testing = run_epoch(test_buffer.get(), false, eval_for_iteration,
+                        update_eval_stat, eval_epoch_end, testing);
+  }
+
+  /** Clear the set inputs and labels */
+  model_graph.setInputsLabels({}, {});
+
+  return status;
+}
+
+void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) {
+  {
+    using std::swap;
+
+    swap(lhs.model_props, rhs.model_props);
+    swap(lhs.model_flex_props, rhs.model_flex_props);
+    swap(lhs.load_path, rhs.load_path);
+    swap(lhs.epoch_idx, rhs.epoch_idx);
+    swap(lhs.iter, rhs.iter);
+    swap(lhs.loss, rhs.loss);
+    swap(lhs.opt, rhs.opt);
+    swap(lhs.data_buffers, rhs.data_buffers);
+    swap(lhs.initialized, rhs.initialized);
+    swap(lhs.model_graph, rhs.model_graph);
+    swap(lhs.graph_representation, rhs.graph_representation);
+    swap(lhs.compiled, rhs.compiled);
+    swap(lhs.loadedFromConfig, rhs.loadedFromConfig);
+  }
+}
+
+int NeuralNetwork::addLayer(NodeType layer) {
+  int status = ML_ERROR_NONE;
+
+  if (initialized) {
+    return ML_ERROR_NOT_SUPPORTED;
+  }
+
+  /** Insert the layer to the graph */
+  model_graph.addLayer(layer);
+  graph_representation.push_back(layer);
+
+  return status;
+}
+
+NeuralNetwork &NeuralNetwork::copyConfiguration(NeuralNetwork &from) {
+  if (this != &from) {
+    model_props = from.model_props;
+    model_flex_props = from.model_flex_props;
+    loss = from.loss;
+    opt = from.opt;
+
+    NetworkGraph f_graph = from.getNetworkGraph();
+    for (auto &l_node : f_graph.getLayerNodes()) {
+      addLayer(static_cast<std::shared_ptr<ml::train::Layer>>(
+        l_node->cloneConfiguration()));
+    }
+  }
+  return *this;
+}
+
+NeuralNetwork::GraphType
+NeuralNetwork::getUnsortedLayers(const std::string &input_layer,
+                                 const std::string &output_layer) {
+  return model_graph.getUnsortedLayers(input_layer, output_layer);
+}
+
+int NeuralNetwork::setOptimizer(
+  std::shared_ptr<ml::train::Optimizer> optimizer) {
+  if (initialized) {
+    return ML_ERROR_NOT_SUPPORTED;
+  }
+
+  opt = std::static_pointer_cast<OptimizerWrapped>(optimizer);
+
+  return ML_ERROR_NONE;
+}
+
+int NeuralNetwork::setDataBuffer(const DatasetModeType &mode,
+                                 std::shared_ptr<DataBuffer> data_buffer) {
+  if (data_buffer == nullptr) {
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+
+  this->data_buffers[static_cast<int>(mode)] = data_buffer;
+
+  return ML_ERROR_NONE;
+}
+
+int NeuralNetwork::getLayer(const char *name,
+                            std::shared_ptr<ml::train::Layer> *layer) {
+  // We provide the layer change through the api with user's responsibility.
+  //
+  // if (compiled) {
+  //   ml_loge("Cannot get compiled layer.");
+  //   return ML_ERROR_NOT_SUPPORTED;
+  // }
+
+  *layer = std::static_pointer_cast<ml::train::Layer>(
+    model_graph.getLayerNode(std::string(name)));
+  return ML_ERROR_NONE;
+}
+
+void NeuralNetwork::printMetrics(std::ostream &out, unsigned int flags) {
+  switch (flags) {
+  case ML_TRAIN_SUMMARY_MODEL_TRAIN_LOSS:
+    out << training.loss << std::endl;
+    break;
+
+  case ML_TRAIN_SUMMARY_MODEL_VALID_LOSS:
+    out << validation.loss << std::endl;
+    break;
+
+  case ML_TRAIN_SUMMARY_MODEL_VALID_ACCURACY:
+    out << validation.accuracy << std::endl;
+    break;
+
+  default:
+    break;
+  }
+}
+
+void NeuralNetwork::printPreset(std::ostream &out, unsigned int preset) {
+  /** print neuralnet metrics */
+  printMetrics(out, preset);
+  if (preset > ML_TRAIN_SUMMARY_TENSOR)
+    return;
+
+  LayerNode::PrintPreset layer_preset = LayerNode::PrintPreset::PRINT_NONE;
+
+  ///@todo match flags with preset
+  unsigned int flags = PRINT_INST_INFO | PRINT_GRAPH_INFO | PRINT_PROP |
+                       PRINT_OPTIMIZER | PRINT_METRIC;
+
+  switch (preset) {
+  case ML_TRAIN_SUMMARY_TENSOR:
+    layer_preset = LayerNode::PrintPreset::PRINT_ALL;
+    break;
+  case ML_TRAIN_SUMMARY_LAYER:
+    layer_preset = initialized ? LayerNode::PrintPreset::PRINT_SUMMARY
+                               : LayerNode::PrintPreset::PRINT_SUMMARY_META;
+    break;
+  case ML_TRAIN_SUMMARY_MODEL:
+    break;
+  default:
+    throw std::invalid_argument("given verbosity is invalid");
+  }
+
+  print(out, flags, layer_preset);
+}
+
+void NeuralNetwork::addWithReferenceLayers(
+  const std::vector<std::shared_ptr<ml::train::Layer>> &reference,
+  const std::string &scope, const std::vector<std::string> &input_layers,
+  const std::vector<std::string> &start_layers,
+  const std::vector<std::string> &end_layers,
+  ml::train::ReferenceLayersType type,
+  const std::vector<std::string> &type_properties) {
+  std::vector<NodeType> casted_reference;
+  casted_reference.reserve(reference.size());
+  for (auto &node : reference) {
+    casted_reference.emplace_back(std::static_pointer_cast<LayerNode>(node));
+  }
+
+  addWithReferenceLayers(casted_reference, scope, input_layers, start_layers,
+                         end_layers, type, type_properties);
+}
+void NeuralNetwork::addWithReferenceLayers(
+  const std::vector<std::shared_ptr<LayerNode>> &reference,
+  const std::string &scope, const std::vector<std::string> &input_layers,
+  const std::vector<std::string> &start_layers,
+  const std::vector<std::string> &end_layers,
+  ml::train::ReferenceLayersType type,
+  const std::vector<std::string> &type_properties) {
+  /// @todo below configuration should be extracted as a free function to make
+  /// it more testable, and reused inside graph interpreter
+
+  /// @note we can exploit connection to connection more fine grained, for now
+  /// it is not supported but we can easily make this supported
+  std::vector<std::shared_ptr<LayerNode>> nodes;
+  nodes.reserve(reference.size());
+  for (auto &node : reference) {
+    nodes.push_back(node->cloneConfiguration());
+  }
+
+  auto start_conns =
+    std::vector<Connection>(start_layers.begin(), start_layers.end());
+  auto input_conns =
+    std::vector<Connection>(input_layers.begin(), input_layers.end());
+  auto end_conns =
+    std::vector<Connection>(end_layers.begin(), end_layers.end());
+
+  std::vector<std::unique_ptr<GraphRealizer>> realizers;
+
+  realizers.emplace_back(new PreviousInputRealizer(start_conns));
+  realizers.emplace_back(new SliceRealizer(start_conns, end_conns));
+
+  if (!input_conns.empty()) {
+    realizers.emplace_back(new InputRealizer(start_conns, input_conns));
+  }
+
+  if (type == ml::train::ReferenceLayersType::RECURRENT) {
+    realizers.emplace_back(
+      new RecurrentRealizer(type_properties, input_conns, end_conns));
+  }
+
+  if (!scope.empty()) {
+    realizers.emplace_back(
+      new RemapRealizer([&scope, &input_conns](std::string &name) {
+        for (auto &i : input_conns) {
+          if (i.getName() == name) {
+            return;
+          }
+        }
+        name = scope + "/" + name;
+      }));
+  }
+
+  for (auto &realizer : realizers) {
+    nodes = realizer->realize(nodes);
+  }
+
+  for (auto &node : nodes) {
+    addLayer(node);
+  }
+}
+
+void NeuralNetwork::exportTo(Exporter &exporter,
+                             const ml::train::ExportMethods &method) const {
+  exporter.saveResult(model_props, method, this);
+  exporter.saveResult(model_flex_props, method, this);
+}
+
+void NeuralNetwork::print(std::ostream &out, unsigned int flags,
+                          LayerNode::PrintPreset layerPrintPreset) {
+  if (flags & PRINT_INST_INFO) {
+    /// @todo uncomment this after implement getProperty (#1875)
+    // out << "===================";
+    // printInstance(out, this);
+  }
+
+  if (flags & PRINT_GRAPH_INFO) {
+    unsigned int total_col_size = 80;
+    std::vector<unsigned int> column_size = {20, 20, 20, 20};
+    auto print_graph_layer_info =
+      [column_size](std::ostream &out, std::vector<std::string> layer_info) {
+        auto trim_string = [](std::string str, unsigned int column_width) {
+          return str.size() < column_width ? str
+                                           : str.substr(0, column_width - 1);
+        };
+
+        for (unsigned int i = 0; i < column_size.size(); ++i) {
+          out << std::setw(column_size[i])
+              << trim_string(layer_info[i], column_size[i]);
+        }
+        out << "\n";
+      };
+
+    out << std::string(total_col_size, '=') << '\n';
+    print_graph_layer_info(
+      out, {"Layer name", "Layer type", "Input dimension", "Input layer"});
+    out << std::string(total_col_size, '=') << '\n';
+    if (compiled) {
+      props::GenericShape dim_property;
+
+      for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
+           iter++) {
+        std::string first_dim;
+        if (iter->getInputDimensions().empty()) {
+          first_dim = "";
+        } else {
+          dim_property.set(iter->getInputDimensions()[0]);
+          first_dim = to_string(dim_property);
+        }
+        const std::vector<std::string> &input_layer_names =
+          iter->getInputConnections();
+        std::string first_input_name =
+          input_layer_names.empty() ? "" : input_layer_names[0];
+        print_graph_layer_info(
+          out, {iter->getName(), iter->getType(), first_dim, first_input_name});
+        for (unsigned int i = 1; i < input_layer_names.size(); ++i) {
+          dim_property.set(iter->getInputDimensions()[i]);
+          print_graph_layer_info(
+            out, {"", "", to_string(dim_property), input_layer_names[i]});
+        }
+        out << std::string(total_col_size,
+                           iter == model_graph.cend() - 1 ? '=' : '-')
+            << '\n';
+      }
+    } else {
+      auto &input_connection =
+        std::get<std::vector<props::InputConnection>>(model_props);
+      auto model_input = std::vector<Connection>(input_connection.begin(),
+                                                 input_connection.end());
+      auto is_actually_an_input_node =
+        [model_input](graph_const_iterator<LayerNode> node) {
+          return node->hasInputShapeProperty() or
+                 std::any_of(model_input.begin(), model_input.end(),
+                             [node](auto &conn) {
+                               return node->getName() == conn.getName();
+                             });
+        };
+
+      for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
+           iter++) {
+        const std::vector<std::string> &input_layer_names =
+          iter->getInputConnections();
+
+        /// @brief connection information.
+        // Intended comment.
+        // std::string first_input_name =
+        //   input_layer_names.empty()
+        //     ? (is_actually_an_input_node(iter) || iter ==
+        //     model_graph.cbegin()
+        //          ? ""
+        //          : (iter - 1)->getName())
+        //     : input_layer_names[0];
+        print_graph_layer_info(out, {iter->getName(), iter->getType(), "", ""});
+        for (unsigned int i = 1; i < input_layer_names.size(); ++i) {
+          print_graph_layer_info(out, {"", "", "", ""});
+        }
+        out << std::string(total_col_size,
+                           iter == model_graph.cend() - 1 ? '=' : '-')
+            << '\n';
+      }
+    }
+  }
+
+  if (flags & PRINT_PROP) {
+    /// @todo print neuralnet property
+    /// @todo print mode (if it is eval or training)
+  }
+
+  if (flags & PRINT_OPTIMIZER) {
+    /// @todo print optimizer (with print optimizer prop)
+  }
+
+  if (flags & PRINT_METRIC) {
+    /// @todo print metric (currently it is done at printPreset as a
+    /// workaround)
+    /// @todo print loss function when it is not initialized. (if it is
+    /// initialized, loss layer will be printed)
+  }
+
+  if (model_graph.empty()) {
+    out << "model is empty!" << std::endl;
+    return;
+  }
+
+  /** print layer properties */
+  for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++)
+    (*iter)->printPreset(out, layerPrintPreset);
+
+  /// @todo Add status to check neuralnet has been run. #290
+}
+
+void NeuralNetwork::forEachLayer(
+  std::function<void(ml::train::Layer &, RunLayerContext &, void *)> fn,
+  void *user_data) {
+  for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+    auto ln = std::static_pointer_cast<LayerNode>(*iter).get();
+    fn(*ln, std::forward<RunLayerContext &>(ln->getRunContext()), user_data);
+  };
+}
+
+void NeuralNetwork::exports(const ml::train::ExportMethods &method,
+                            const std::string file_path) {
+  switch (method) {
+  case ml::train::ExportMethods::METHOD_TFLITE: {
+#ifdef ENABLE_TFLITE_INTERPRETER
+    nntrainer::TfliteInterpreter interpreter;
+
+    /// We will call "serialize" method for the model which is already trained
+    /// or allocated. So, we need to call deallocateTensors first to make sure
+    /// `dealloc_weights == false`
+    model_graph.deallocateTensors();
+    model_graph.allocateTensors(ExecutionMode::INFERENCE);
+    interpreter.serialize(graph_representation, file_path);
+    model_graph.deallocateTensors();
+#else
+    throw std::runtime_error{
+      "Export methods METHOD_TFLITE is not supported. Please enable tflite "
+      "interpreter by set ENABLE_TFLITE_INTERPRETER=1"};
+#endif
+    break;
+  }
+  case ml::train::ExportMethods::METHOD_FLATBUFFER: {
+
+    model_graph.deallocateTensors();
+    model_graph.allocateTensors(ExecutionMode::TRAIN);
+    break;
+  }
+  default:
+    throw std::runtime_error{"Unsupported export method"};
+  }
+}
+} /* namespace nntrainer */
diff --git a/nntrainer/models/#neuralnet.h# b/nntrainer/models/#neuralnet.h#
new file mode 100644 (file)
index 0000000..ca209de
--- /dev/null
@@ -0,0 +1,681 @@
+/**
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * @file       neuralnet.h
+ * @date       04 December 2019
+ * @brief      This is Neural Network Class
+ * @see                https://github.com/nnstreamer/nntrainer
+ * @author     Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug                No known bugs except for NYI items
+ *
+ */
+#ifndef __NEURALNET_H__
+#define __NEURALNET_H__
+#ifdef __cplusplus
+
+#include <array>
+#include <map>
+#include <memory>
+#include <tuple>
+#include <vector>
+#ifdef PROFILE
+#include <chrono>
+#endif
+
+#include <app_context.h>
+#include <common_properties.h>
+#include <compiler_fwd.h>
+#include <dynamic_training_optimization.h>
+#include <execution_mode.h>
+#include <layer_node.h>
+#include <model_common_properties.h>
+#include <network_graph.h>
+#include <optimizer_wrapped.h>
+#include <tensor.h>
+
+#include <model.h>
+#include <nntrainer-api-common.h>
+#include <nntrainer_error.h>
+#include <node_exporter.h>
+
+namespace ml::train {
+class DataSet;
+enum class DatasetType;
+enum class DatasetModeType;
+} // namespace ml::train
+
+namespace nntrainer {
+
+class Exporter;
+
+/**
+ * @brief     Enumeration of Network Type
+ */
+using NetType = ml::train::ModelType;
+
+class DataBuffer;
+using DatasetType = ml::train::DatasetType;
+using DatasetModeType = ml::train::DatasetModeType;
+using RunStats = ml::train::RunStats;
+
+/**
+ * @class   NeuralNetwork Class
+ * @brief   NeuralNetwork Class which has Network Configuration & Layers
+ */
+class NeuralNetwork : public ml::train::Model {
+  friend class ModelLoader; /** access private members of ModelLoader */
+
+public:
+  using NodeType = std::shared_ptr<LayerNode>; /** Type of a Node */
+  using GraphType = std::vector<NodeType>;     /** actual graph type */
+  using FlatGraphType =
+    std::vector<NodeType>; /** topological sorted, iterable 1-D list of nodes */
+  using NetworkGraphType = nntrainer::NetworkGraph;
+
+
+  /**
+   * @brief     Constructor of NeuralNetwork Class
+   */
+  NeuralNetwork();
+
+  /**
+   * @brief     Constructor of NeuralNetwork Class
+   */
+  NeuralNetwork(AppContext app_context_);
+
+  /**
+   * @brief     Destructor of NeuralNetwork Class
+   */
+  ~NeuralNetwork();
+
+  /**
+   * @brief     Get Loss from the previous ran batch of data
+   * @retval    loss value
+   */
+  float getLoss() override;
+
+  /**
+   * @brief returns compilation state of a network
+   * @retval initialized value
+   */
+  bool getCompiled() const override { return compiled; }
+
+  /**
+   * @brief returns initialization state of a network
+   * @retval initialized value
+   */
+  bool getInitialized() const override { return initialized; }
+
+  /**
+   * @brief returns loadedFromConfig state of a network
+   * @retval loadedFromConfig value
+   */
+  bool getLoadedFromConfig() const override { return loadedFromConfig; }
+
+  /**
+   * @brief     Get Loss from the previous epoch of training data
+   * @retval    loss value
+   */
+  float getTrainingLoss() override { return training.loss; }
+
+  /**
+   * @brief     Get Loss from the previous epoch of validation data
+   * @retval    loss value
+   */
+  float getValidationLoss() override { return validation.loss; }
+
+  RunStats getTrainingStats() override { return training; }
+
+  RunStats getValidStats() override { return validation; }
+
+  RunStats getTestStats() override { return testing; }
+
+  /**
+   * @brief     Get Learning rate
+   * @retval    Learning rate
+   *
+   * @todo update to return the last used learning rate
+   */
+  float getLearningRate() { return opt->getLearningRate(0); };
+
+  /**
+   * @brief     Create and load the Network with ini configuration file.
+   * @param[in] config config file path
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int loadFromConfig(const std::string &config) override;
+
+  /**
+   * @brief     Compile the graph in the model
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int compile() override;
+
+  /**
+   * @brief     set Property of Network
+   * @param[in] values values of property
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  /**
+   * @brief     Initialize Network. This should be called after set all
+   * hyperparameters.
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int initialize() override;
+
+  /**
+   * @brief     Allocate memory for the model. This should be called after
+   * initialize.
+   * @param[in] exec_mode allocate memory based on the given execution mode
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int allocate(ExecutionMode mode = ExecutionMode::TRAIN);
+
+  /**
+   * @brief     Deallocate memory for the model.
+   * @param[in] trainable Assign memory for inference or train mode
+   * @retval #ML_ERROR_NONE Successful.
+   * @note This does not free the model graph but only the weight tensors, and
+   * input/output/gradient/derivative tensors if any.
+   */
+  int deallocate();
+
+  /**
+   * @brief     Update graph to make batch normalization in-place
+   * @note      This assumes that the batch normalization implementation does
+   * not need input/output of itself while backwarding. The reason is that the
+   * batch normalization layer caches a processed form of its own input than the
+   * input tensor itself.
+   * @note      This optimization might break the working when some other
+   * implementation of batch normalization layer is used or delegated to some
+   * other backend. Ensure to verify this optimization with other
+   * implementations once added.
+   */
+  void inPlaceOptimization(const std::string &layer_type);
+
+  /**
+   * @brief     Forward Propagation of the neural network
+   */
+  sharedConstTensors forwarding(bool training = true,
+                                std::function<bool(void *userdata)> stop_cb =
+                                  [](void *user_data) { return false; },
+                                void *user_data = nullptr);
+
+  /**
+   * @brief     Forward Propagation of the neural network
+   * @param[in] input List of Input Tensors taken by the neural network
+   * @param[in] label List of Label Tensors for the model
+   * @retval    List of Output Tensors
+   */
+  sharedConstTensors forwarding(sharedConstTensors input,
+                                sharedConstTensors label = {},
+                                bool training = true);
+
+  /**
+   * @brief     Backward Propagation of the neural network
+   * @param[in] iteration Iteration Number for the optimizer
+   */
+  void backwarding(int iteration,
+                   std::function<bool(void *userdata)> stop_cb =
+                     [](void *user_data) { return false; },
+                   void *user_data = nullptr);
+
+  /**
+   * @copydoc Model::save(const std::string &file_path, ml::train::ModelFormat
+   * format);
+   */
+  void save(const std::string &file_path,
+            ml::train::ModelFormat format =
+              ml::train::ModelFormat::MODEL_FORMAT_BIN) override;
+
+  /**
+   * @copydoc Model::load(const std::string &file_path, ml::train::ModelFormat
+   * format);
+   */
+  void load(const std::string &file_path,
+            ml::train::ModelFormat format =
+              ml::train::ModelFormat::MODEL_FORMAT_BIN) override;
+
+  /**
+   * @brief     get Epochs
+   * @retval    epochs
+   */
+  unsigned int getEpochs() {
+    return std::get<props::Epochs>(model_flex_props);
+  };
+
+  /**
+   * @brief     get current epoch_idx
+   * @retval    current epoch_idx
+   */
+  unsigned int getCurrentEpoch() override;
+
+  /**
+   * @brief     Copy Neural Network
+   * @param[in] from NeuralNetwork Object to copy
+   * @retval    NeuralNewtork Object copyed
+   * @todo Need to implement the copy of graph core
+   */
+  NeuralNetwork &copy(NeuralNetwork &from);
+
+  /**
+   * @brief     Copy Neural Network Configuration
+   * @param[in] from NeuralNetwork Object to copy
+   * @retval    NeuralNewtork Object copyed
+   * @note This does not copy the context of neural network model. It only
+   * copies the configuration of the network model. Therefore, it needs the
+   * compile and initialization to run the model. Also if you need the
+   * initialized the weight, load call is required.
+   */
+  NeuralNetwork &copyConfiguration(NeuralNetwork &from);
+
+  /**
+   * @brief     Run NeuralNetwork train
+   * @param[in] values hyper parameters
+   * @param[in] stop_cb callback function to decide stop training or not
+   * ~~~~~
+   * @a stop_user_data user_data to be used in stop_cb
+   * @a bool true if stop the training
+   * ~~~~~
+   * @param[in] epoch_complete_cb Called the end of an epoch.
+   * @a epoch_user_data user_data to be used in epoch_complete_cb
+   * ~~~~~
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int train(const std::vector<std::string> &values = {},
+            std::function<bool(void *)> stop_cb =
+              [](void *stop_user_data) { return false; },
+            void *stop_user_data = nullptr,
+            std::function<void(void *)> epoch_complete_cb =
+              [](void *epoch_user_data) { return false; },
+            void *epoch_user_data = nullptr) override;
+
+  /**
+   * @brief     Run NeuralNetwork inference
+   * @param[in] X input tensor
+   * @param[in] free_mem true to free memory. used only in training mode.
+   * @retval shared_ptr<const Tensor>
+   */
+  sharedConstTensors inference(sharedConstTensors X, bool free_mem = false);
+
+  /**
+   * @brief     Run NeuralNetwork inference
+   * @param[in] X input tensor
+   * @param[in] label label tensor
+   * @param[in] free_mem true to free memory. used only in training mode.
+   * @retval shared_ptr<const Tensor>
+   */
+  sharedConstTensors inference(sharedConstTensors X, sharedConstTensors label,
+                               bool free_mem = false);
+
+  /**
+   * @brief     Run the inference of the model
+   * @param[in] batch batch size of current input
+   * @param[in] input inputs as a list of each input data
+   * @param[in] label labels as a list of each label data
+   * @retval list of output as float *
+   * @note The output memory must not be freed by the caller
+   */
+  std::vector<float *> inference(unsigned int batch,
+                                 const std::vector<float *> &input,
+                                 const std::vector<float *> &label) override;
+
+  /**
+   * @brief     Run NeuralNetwork train with callback function by user
+   * @param[in] dt datatype (mode) where it should be
+   * @param[in] dataset set the dataset
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int setDataset(const DatasetModeType &dt,
+                 std::shared_ptr<ml::train::Dataset> dataset) override;
+
+  /**
+   * @copydoc void forEachLayer(std::function<void(Layer &,
+   * nntrainer::RunLayerContext &), void *user_data> fn);
+   *
+   */
+  void forEachLayer(
+    std::function<void(ml::train::Layer & /**< layer */,
+                       RunLayerContext & /**< rc */, void *user_data)>
+      fn,
+    void *user_data = nullptr) override;
+
+  /**
+   * @brief     Run NeuralNetwork train with callback function by user
+   * @param[in] dt datatype (mode) where it should be
+   * @param[in] databuffer set the databuffer
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int setDataBuffer(const DatasetModeType &dt,
+                    std::shared_ptr<DataBuffer> data_buffer);
+
+  /**
+   * @brief     add layer into neural network model
+   * @param[in] layer layer to add
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int addLayer(std::shared_ptr<ml::train::Layer> layer) override {
+    return addLayer(std::static_pointer_cast<LayerNode>(layer));
+  }
+
+  /**
+   * @brief     add layer into neural network model
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int addLayer(NodeType layer);
+
+  /**
+   * @brief     set optimizer for the neural network model
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int setOptimizer(std::shared_ptr<ml::train::Optimizer> optimizer) override;
+
+  /**
+   * @brief     get layer by name from neural network model
+   * @param[in] name name of the layer to get
+   * @param[out] layer shared_ptr to hold the layer to get
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int getLayer(const char *name,
+               std::shared_ptr<ml::train::Layer> *layer) override;
+
+  /**
+   * @brief this function helps exporting the layer in a predefined format,
+   * while workarounding issue caused by templated function type eraser
+   *
+   * @param     exporter exporter that conatins exporting logic
+   * @param     method enum value to identify how it should be exported to
+   */
+  void exportTo(Exporter &exporter,
+                const ml::train::ExportMethods &method) const;
+
+  /**
+   * @brief     get input dimension of neural network
+   * @retval std::vector<TensorDim> input dimension
+   */
+  std::vector<TensorDim> getInputDimension() override {
+    if (!compiled) {
+      throw std::logic_error("model should be compiled before get dimension");
+    }
+    return model_graph.getInputDimension();
+  }
+
+  /**
+   * @brief     get output dimension of neural network
+   * @retval std::vector<TensorDim> output dimension
+   */
+  std::vector<TensorDim> getOutputDimension() override {
+    if (!compiled) {
+      throw std::logic_error("model should be compiled before get dimension");
+    }
+    return model_graph.getOutputDimension();
+  }
+
+  /**
+   * @brief get FlatGraph of current graph
+   * @note flat graph contains pointer to the actual nodes, which is not deeply
+   * copied.
+   * @retval flatGraph of the current graph
+   * @note these layers will be in sorted order if the model is compiled,
+   * otherwise the order is the order of addition of layers in the model.
+   */
+  FlatGraphType getFlatGraph() { return model_graph.getLayerNodes(); }
+
+  /**
+   * @brief get if the model is empty
+   * @param[out] true if empty, else false
+   */
+  bool empty() const { return model_graph.empty(); }
+
+  /**
+   * @brief get the number of nodes in the model
+   * @param[out] number of nodes
+   */
+  size_t size() const { return model_graph.size(); }
+
+  /**
+   * @brief     get network graph
+   * @retval NetowrkGraphType
+   */
+  NetworkGraphType getNetworkGraph() { return model_graph; }
+
+  /**
+   * @brief get current graph from the model
+   * @note graph contains pointer to the actual nodes, which is not deeply
+   * copied.
+   * @retval current graph
+   */
+  GraphType getUnsortedLayers(const std::string &input_layer = "",
+                              const std::string &output_layer = "");
+
+  /**
+   * @brief     Summarize the model
+   * @param out std::ostream to get the model summary
+   * @param verbosity verbosity of the summary
+   */
+  virtual void summarize(std::ostream &out,
+                         ml_train_summary_type_e verbosity) override {
+    printPreset(out, (unsigned int)verbosity);
+  }
+
+  /**
+   * @brief Print Option when printing model info. The function delegates to the
+   * `print`
+   * @param out std::ostream to print
+   * @param preset preset from `ml_train_summary_type_e`
+   */
+  virtual void printPreset(std::ostream &out, unsigned int preset);
+
+  /**
+   * @brief Enable dynamic fine-tuning optimization
+   * @param threshold Comparison limit to decide if weight updated or not
+   * @param mode dynamic fine-tuning optimization mode. Supported modes are
+   * "max" and "norm" for now
+   */
+  void enableDynamicTraining(
+    float threshold, std::string op = DynamicTrainingOptimization::dft_opt_norm,
+    std::string mode = DynamicTrainingOptimization::dft_opt_mode_derivative) {
+    dynamic_training_opt.setThreshold(threshold);
+    dynamic_training_opt.setOp(op);
+    dynamic_training_opt.setMode(mode);
+    dynamic_training_opt.enable();
+  }
+
+  /**
+   * @brief Disable dynamic fine-tuning optimization
+   */
+  void disableDynamicFineTuning() { dynamic_training_opt.disable(); }
+
+  /**
+   * @copydoc   void ml::train::Model::addWithReferenceLayers(
+   * const std::vector<std::shared_ptr<Layer>> &reference,
+   * const std::string &scope, const std::vector<std::string> &input_layers,
+   * const std::vector<std::string> &start_layers,
+   * const std::vector<std::string> &end_layers, ReferenceLayersType type,
+   * const std::vector<std::string> &type_properties = {})
+   *
+   */
+  void addWithReferenceLayers(
+    const std::vector<std::shared_ptr<ml::train::Layer>> &reference,
+    const std::string &scope, const std::vector<std::string> &input_layers,
+    const std::vector<std::string> &start_layers,
+    const std::vector<std::string> &end_layers,
+    ml::train::ReferenceLayersType type,
+    const std::vector<std::string> &type_properties = {}) override;
+
+  /**
+   * @copydoc   void ml::train::Model::addWithReferenceLayers(
+   * const std::vector<std::shared_ptr<Layer>> &reference,
+   * const std::string &scope, const std::vector<std::string> &input_layers,
+   * const std::vector<std::string> &start_layers,
+   * const std::vector<std::string> &end_layers, ReferenceLayersType type,
+   * const std::vector<std::string> &type_properties = {})
+   */
+  void addWithReferenceLayers(
+    const std::vector<std::shared_ptr<LayerNode>> &reference,
+    const std::string &scope, const std::vector<std::string> &input_layers,
+    const std::vector<std::string> &start_layers,
+    const std::vector<std::string> &end_layers,
+    ml::train::ReferenceLayersType type,
+    const std::vector<std::string> &type_properties = {});
+
+  /**
+   * @brief     export the model according to given export method
+   * @param method export method
+   * @param file_path path to be serialized
+   */
+  void exports(const ml::train::ExportMethods &method,
+               const std::string file_path) override;
+
+private:
+  using FlexiblePropTypes =
+    std::tuple<props::Epochs, props::TrainingBatchSize, props::SavePath,
+               props::ContinueTrain, props::SaveBestPath,
+               props::MemoryOptimization, props::MemorySwap,
+               props::MemorySwapPath, props::MemorySwapLookahead>;
+  using RigidPropTypes =
+    std::tuple<props::LossType, std::vector<props::InputConnection>,
+               std::vector<props::LabelLayer>, props::ClipGradByGlobalNorm>;
+
+  RigidPropTypes model_props;         /**< model props */
+  FlexiblePropTypes model_flex_props; /**< model train props */
+  std::string load_path; /**< path to load weights when initialize  */
+
+  /**
+   * @brief   Print Options when printing layer info
+   */
+  typedef enum {
+    // clang-format off
+  PRINT_INST_INFO  = (1 << 0), /**< Option to print type & instance address info */
+  PRINT_GRAPH_INFO = (1 << 1), /**< Option to print graph topology info */
+  PRINT_PROP       = (1 << 2), /**< Option to print properties */
+  PRINT_OPTIMIZER  = (1 << 3), /**< Option to print optimizer */
+  PRINT_METRIC       = (1 << 4), /**< Option to print if current network is set to training */
+    // clang-format on
+  } PrintOption;
+
+  unsigned int epoch_idx; /**< Number of epoch_idx  */
+
+  unsigned int iter; /**< iterations trained */
+
+  float loss; /**< loss */
+
+  std::shared_ptr<OptimizerWrapped> opt; /**< Optimizer; this gets copied into
+                    each layer, do not use this directly */
+
+  std::array<std::shared_ptr<DataBuffer>, 3>
+    data_buffers; /**< Data Buffers to get Input */
+
+  bool initialized; /**< Network is initialized */
+
+  bool compiled; /**< Network is compiled */
+
+  bool loadedFromConfig; /**< Check if config is loaded to prevent load twice */
+
+  RunStats validation; /** validation statistics of the model */
+  RunStats training;   /** training statistics of the model */
+  RunStats testing;    /** testing statistics of the model */
+
+  AppContext app_context; /** Configurations bound to current app */
+
+  NetworkGraph model_graph;                 /** Network Model Graph */
+  GraphRepresentation graph_representation; /** Unsorted graph representation */
+
+  DynamicTrainingOptimization dynamic_training_opt; /**< Dynamic fine-tuning
+   optimization mode. supported modes are "max" and "norm" */
+
+  /**
+   * @brief save model in ini
+   *
+   * @param file_path file path
+   */
+  void saveModelIni(const std::string &file_path);
+
+  /**
+   * @brief print function for neuralnet
+   * @param[in] out outstream
+   * @param[in] flags bit combination of Neuralnet::PrintOption
+   * @param[in] Layer::PrintPreset print preset when to print layer properties
+   */
+  void print(std::ostream &out, unsigned int flags = 0,
+             LayerNode::PrintPreset layerPrintPreset =
+               LayerNode::PrintPreset::PRINT_SUMMARY);
+
+  /**
+   * @brief     Set Loss
+   * @param[in] l loss value
+   */
+  void setLoss(float l);
+
+  /**
+   * @brief     Run NeuralNetwork train
+   * @param[in] stop_cb callback function to decide stop training or not
+   * @param[in] epoch_complete_cb Called the end of an epoch.
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int train_run(std::function<bool(void *)> stop_cb =
+                  [](void *) { return false; },
+                void *user_data = nullptr,
+                std::function<void(void *)> epoch_complete_cb =
+                  [](void *) { return false; },
+                void *data = nullptr);
+
+  /**
+   * @brief     Swap function for the class
+   */
+  friend void swap(NeuralNetwork &lhs, NeuralNetwork &rhs);
+
+  /**
+   * @brief     set Property/Configuration of Network for training after the
+   * network has been initialized
+   * @param[in] values values of property
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  void setTrainConfig(const std::vector<std::string> &values);
+
+  /**
+   * @brief print metrics function for neuralnet
+   * @param[in] out outstream
+   * @param[in] flags verbosity from ml_train_summary_type_e
+   */
+  void printMetrics(std::ostream &out, unsigned int flags = 0);
+
+  /**
+   * @brief     Match the given tensor shape with input shape of the model
+   * @param[in] X input tensor
+   * @retval true if matches, false is error
+   */
+  bool validateInput(sharedConstTensors X);
+};
+
+} /* namespace nntrainer */
+
+#endif /* __cplusplus */
+#endif /* __NEURALNET_H__ */
diff --git a/nntrainer/models/circle_plus/circle_plus.fbs b/nntrainer/models/circle_plus/circle_plus.fbs
new file mode 100644 (file)
index 0000000..347dcdc
--- /dev/null
@@ -0,0 +1,175 @@
+namespace circle_plus;
+
+file_extension "op";
+
+enum TensorType : byte {
+     FLOAT32 = 0,
+     FLOAT16 = 1,
+}
+
+enum InitializerType: byte{
+     ZEORS = 0,
+     ONES = 1,
+     LECUN_NORMAL = 2,
+     LECUN_UNIFORM = 3,
+     XAVIER_NORMAL = 4,
+     XAVIER_UNIFORM = 5,
+     HE_NORMAL = 6,
+     HE_UNIFORM = 7,
+     NONE=8,
+}
+
+enum ActivationType: byte{
+     TANH=0,
+     SIGMOID=1,
+     SOFTMAX=2,
+     RELU=3,
+     LEAKY_RELU=4,
+     NONE=5,
+}
+
+//Tensor
+table Tensor{
+      type:TensorType;
+      dim:[int];
+      name:string;
+      buffer:uint;
+}
+
+//Tensor Mapping : name - index
+table TensorMap{
+      name:string;
+      index:uint;
+}
+
+
+//Buffer
+table Buffer{
+      data:[ubyte];
+}
+
+
+//Layers
+enum LayerTypes : int32 {
+     FULLY_CONNECTED = 0,
+}
+
+union LayerOptions {
+      FullyConnectedOptions,
+}
+
+table FullyConnectedOptions {
+     unit:uint;
+     weight_initializer:InitializerType;
+     bias_initializer:InitializerType;
+}
+
+table Layers {
+      type:LayerTypes;
+      name:string;
+      options:LayerOptions;
+      input_layers:[string];
+      input_shape:[int];
+      activation:ActivationType;
+      weignts:[Tensor];
+      input_tensors:[Tensor];
+      output_tensors:[Tensor];
+}
+
+
+//Learning Rate
+enum LRSchedulerType: int32{
+     CONSTANT = 0,
+     EXPONENTIAL = 1,
+     STEP=2,
+}
+
+union LROptions{
+      ConstantLROptions,
+      ExponentialLROptions,
+      StepLROptions,
+}
+
+table ConstantLROptions{
+      learning_rate:float;
+}
+
+table ExponentialLROptions{
+      learning_rate:float;
+}
+
+table StepLROptions{
+      learning_rate:float;
+}
+
+table LRScheduler{
+      type:LRSchedulerType;
+      options:LROptions;
+}
+
+//Optimizer 
+enum OptimizerType: int32{
+     SGD = 0,
+     ADAM =1,
+}
+
+union OptimizerOptions{
+      SGDOptimizerOptions,
+      AdamOptimizerOptions,
+}
+
+table SGDOptimizerOptions{
+      
+}
+
+table AdamOptimizerOptions{
+      beta1:float;
+      beta2:float;
+      epsilon:float;
+}
+
+table Optimizer{
+      type:OptimizerType;
+      options:OptimizerOptions;
+}
+
+enum LossType:int32{
+     MSE =0,
+     CROSS = 1,
+}
+
+union LossOptions{
+      MSELossOptions,
+      CrossLossOptions,
+}
+
+table MSELossOptions{
+}
+
+table CrossLossOptions{
+}
+
+table Loss{
+      type:LossType;
+      options:LossOptions;
+}
+
+table NetworkGraph{
+      name:string;
+      input_tensors:[int];
+      output_tensors:[int];
+      layers:[Layers];
+}
+
+
+table Model {
+      name:string;
+      epochs:uint;
+      batch_size:uint;
+      optimizer:Optimizer;
+      learning_rate_scheduler:LRScheduler;
+      loss:Loss;      
+      network_graph:[NetworkGraph];
+}
+
+root_type Model;
\ No newline at end of file
diff --git a/nntrainer/models/circle_plus/nntrainer.fbs b/nntrainer/models/circle_plus/nntrainer.fbs
new file mode 100644 (file)
index 0000000..e71bc73
--- /dev/null
@@ -0,0 +1,175 @@
+namespace circle_p;
+
+file_extension "circle_p";
+
+enum TensorType : byte {
+     FLOAT32 = 0,
+     FLOAT16 = 1,
+}
+
+enum InitializerType: byte{
+     ZEORS = 0,
+     ONES = 1,
+     LECUN_NORMAL = 2,
+     LECUN_UNIFORM = 3,
+     XAVIER_NORMAL = 4,
+     XAVIER_UNIFORM = 5,
+     HE_NORMAL = 6,
+     HE_UNIFORM = 7,
+     NONE=8,
+}
+
+enum ActivationType: byte{
+     TANH=0,
+     SIGMOID=1,
+     SOFTMAX=2,
+     RELU=3,
+     LEAKY_RELU=4,
+     NONE=5,
+}
+
+//Tensor
+table Tensor{
+      type:TensorType;
+      dim:[int];
+      name:string;
+      buffer:uint;
+}
+
+//Tensor Mapping : name - index
+table TensorMap{
+      name:string;
+      index:uint;
+}
+
+
+//Buffer
+table Buffer{
+      data:[ubyte];
+}
+
+
+//Layers
+enum LayerTypes : int32 {
+     FULLY_CONNECTED = 0,
+}
+
+union LayerOptions {
+      FullyConnectedOptions,
+}
+
+table FullyConnectedOptions {
+     unit:uint;
+     weight_initializer:InitializerType;
+     bias_initializer:InitializerType;
+}
+
+table Layers {
+      type:LayerTypes;
+      name:string;
+      options:LayerOptions;
+      input_layers:[string];
+      input_shape:[int];
+      activation:ActivationType;
+      weignts:[Tensor];
+      input_tensors:[Tensor];
+      output_tensors:[Tensor];
+}
+
+
+//Learning Rate
+enum LRSchedulerType: int32{
+     CONSTANT = 0,
+     EXPONENTIAL = 1,
+     STEP=2,
+}
+
+union LROptions{
+      ConstantLROptions,
+      ExponentialLROptions,
+      StepLROptions,
+}
+
+table ConstantLROptions{
+      learning_rate:float;
+}
+
+table ExponentialLROptions{
+      learning_rate:float;
+}
+
+table StepLROptions{
+      learning_rate:float;
+}
+
+table LRScheduler{
+      type:LRSchedulerType;
+      options:LROptions;
+}
+
+//Optimizer 
+enum OptimizerType: int32{
+     SGD = 0,
+     ADAM =1,
+}
+
+union OptimizerOptions{
+      SGDOptimizerOptions,
+      AdamOptimizerOptions,
+}
+
+table SGDOptimizerOptions{
+      
+}
+
+table AdamOptimizerOptions{
+      beta1:float;
+      beta2:float;
+      epsilon:float;
+}
+
+table Optimizer{
+      type:OptimizerType;
+      options:OptimizerOptions;
+}
+
+enum LossType:int32{
+     MSE =0,
+     CROSS = 1,
+}
+
+union LossOptions{
+      MSELossOptions,
+      CrossLossOptions,
+}
+
+table MSELossOptions{
+}
+
+table CrossLossOptions{
+}
+
+table Loss{
+      type:LossType;
+      options:LossOptions;
+}
+
+table NetworkGraph{
+      name:string;
+      input_tensors:[int];
+      output_tensors:[int];
+      layers:[Layers];
+}
+
+
+table Model {
+      name:string;
+      epochs:uint;
+      batch_size:uint;
+      optimizer:Optimizer;
+      learning_rate_scheduler:LRScheduler;
+      loss:Loss;      
+      network_graph:[NetworkGraph];
+}
+
+root_type Model;
\ No newline at end of file
diff --git a/nntrainer/models/circle_plus/test b/nntrainer/models/circle_plus/test
new file mode 100755 (executable)
index 0000000..57cd407
Binary files /dev/null and b/nntrainer/models/circle_plus/test differ
diff --git a/nntrainer/models/circle_plus/test.bin b/nntrainer/models/circle_plus/test.bin
new file mode 100644 (file)
index 0000000..a9dd97d
Binary files /dev/null and b/nntrainer/models/circle_plus/test.bin differ
diff --git a/nntrainer/models/circle_plus/test.cpp b/nntrainer/models/circle_plus/test.cpp
new file mode 100644 (file)
index 0000000..e54f034
--- /dev/null
@@ -0,0 +1,37 @@
+#include <iostream>
+#include "circle_plus_generated.h"
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/util.h"
+#define READ_WRITE 0
+
+int main(){
+  flatbuffers::FlatBufferBuilder builder;
+
+  if(READ_WRITE){
+    auto network_name = builder.CreateString("Model");
+    auto epochs = 1500;
+    auto batch_size = 32;
+
+    builder.Finish(circle_plus::CreateModel(builder, network_name, epochs, batch_size));
+
+    auto data = builder.GetBufferPointer();
+
+    auto model = circle_plus::GetModel(data);
+    std::cout << model->name()->c_str()<<" " <<model->epochs() <<" " <<model->batch_size() <<std::endl;
+    
+
+    flatbuffers::SaveFile("test.bin", reinterpret_cast<char*>(data), builder.GetSize(),true);
+  }else{
+    std::string binaryfile;
+    bool ok = flatbuffers::LoadFile("test.bin", false, &binaryfile);
+    builder.PushBytes(reinterpret_cast<unsigned char*>(const_cast<char*>(binaryfile.c_str())), binaryfile.size());
+
+    auto model = circle_plus::GetModel(builder.GetCurrentBufferPointer());
+    std::cout << model->name()->c_str()<<" " <<model->epochs() <<" " <<model->batch_size() <<std::endl;
+
+  }
+  
+  
+  
+  return 0;
+}
diff --git a/nntrainer/setProperty b/nntrainer/setProperty
new file mode 100644 (file)
index 0000000..5c35312
--- /dev/null
@@ -0,0 +1,1939 @@
+./app_context.cpp:// SPDX-License-Identifier: Apache-2.0
+./app_context.cpp: * ----------------------------------------
+./app_context.cpp:  auto layer = pluggable->createfunc();
+./app_context.cpp:  auto type = layer->getType();
+./app_context.cpp:  pluggable->destroyfunc(layer);
+./app_context.cpp:  auto optimizer = pluggable->createfunc();
+./app_context.cpp:  auto type = optimizer->getType();
+./app_context.cpp:  pluggable->destroyfunc(optimizer);
+./app_context.cpp:    if (endswith(entry->d_name, solib_suffix)) {
+./app_context.cpp:      if (endswith(entry->d_name, layerlib_suffix)) {
+./app_context.cpp:          int key = registerLayer(entry->d_name, base_path);
+./app_context.cpp:      } else if (endswith(entry->d_name, optimizerlib_suffix)) {
+./app_context.cpp:          int key = registerOptimizer(entry->d_name, base_path);
+./app_context.cpp:  std::string assigned_key = key == "" ? factory({})->getType() : key;
+./app_context.cpp:  if (int_key != -1 && int_map.find(int_key) != int_map.end()) {
+./app_context.cpp:  int assigned_int_key = int_key == -1 ? str_map.size() + 1 : int_key;
+./app_context.h:// SPDX-License-Identifier: Apache-2.0
+./app_context.h: * @class AppContext contains user-dependent configuration
+./app_context.h:   * 1) integer -> string index
+./app_context.h:   * 2) string -> factory index
+./app_context.h:   * calling factory({})->getType();
+./app_context.h:   * @param int_key key to access the factory by integer, if it is -1(default),
+./app_context.h:                            const int int_key = -1) {
+./app_context.h:   * calling factory({})->getType();
+./app_context.h:   * @param int_key key to access the factory by integer, if it is -1(default),
+./app_context.h:                            const int int_key = -1);
+./app_context.h:    return createObject<T>(entry->second, props);
+./app_context.h:    return entry->second(props);
+./compiler/tflite_interpreter.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/slice_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/remap_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/flatten_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/flatbuffer_interpreter.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/recurrent_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/recurrent_realizer.h:                        $(unroll_for)-1 */
+./compiler/compiler.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/compiler.h: * ExecutableGraph eg = compiler->compile(g);
+./compiler/compiler.h: *    +-------+--+--------+
+./compiler/compiler.h: *    +-------+-----------+
+./compiler/compiler.h: *      +--------+------+
+./compiler/compiler.h: *      +---------------+
+./compiler/multiout_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/tflite_opnode.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/tflite_opnode.cpp:      tensor->reshape(TensorDim{tensor->batch(), tensor->height(),
+./compiler/tflite_opnode.cpp:                                tensor->width(), tensor->channel()});
+./compiler/tflite_opnode.cpp:    auto previous_input_shape = input_nodes[0]->getInputs()[0];
+./compiler/tflite_opnode.cpp:    const unsigned int UNIT = outputs[0]->height();
+./compiler/tflite_opnode.cpp:    const unsigned int CHANNEL = previous_input_shape->channel();
+./compiler/tflite_opnode.cpp:    const unsigned int HEIGHT = previous_input_shape->height();
+./compiler/tflite_opnode.cpp:    const unsigned int WIDTH = previous_input_shape->width();
+./compiler/tflite_opnode.cpp:    auto weight_data = weights[0]->getData();
+./compiler/tflite_opnode.cpp:    new_weights.push_back(weights[0]->transpose("0:2:1"));
+./compiler/tflite_opnode.cpp:      std::transform(node_owned_variable.end() - result.size(),
+./compiler/tflite_opnode.cpp:      // NNTR_THROW_IF(dynamic_cast<InputLayer>(layer_ptr->getLayer()) ==
+./compiler/tflite_opnode.cpp:      std::transform(node_owned_variable.end() - result.size(),
+./compiler/interpreter.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/interpreter.h: * g = interpreter->serialize(f);
+./compiler/interpreter.h: *         +--------+
+./compiler/interpreter.h: *         +--+--+--+
+./compiler/interpreter.h: *    +-------+--+--------+
+./compiler/interpreter.h: *    +-------+-----------+
+./compiler/flatbuffer_opnode.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/tf_schema.fbs://     http://www.apache.org/licenses/LICENSE-2.0
+./compiler/tf_schema.fbs:  //   f = scale * (q - zero_point)
+./compiler/tf_schema.fbs:// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+./compiler/tf_schema.fbs:// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+./compiler/tf_schema.fbs:// potentially with a k-dimensional block (0 <= k <= n) with dims
+./compiler/tf_schema.fbs:// (dn, ..., dn+k-1), the format needs to specify:
+./compiler/tf_schema.fbs://   1. In what order to traverse these dimensions. For example, to store a 2-D
+./compiler/tf_schema.fbs://      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+./compiler/tf_schema.fbs://   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+./compiler/tf_schema.fbs://      tensor dimension in (d0, ..., dn-1).
+./compiler/tf_schema.fbs://      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+./compiler/tf_schema.fbs://   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+./compiler/tf_schema.fbs:// Variable-typed buffer to store the index metadata for a sparse dimension.
+./compiler/tf_schema.fbs:// vector. We don't want the per-dimensional index to overflow that range.
+./compiler/tf_schema.fbs:  //   - If format is DimensionType.DENSE then we use the dense_size field to
+./compiler/tf_schema.fbs:  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+./compiler/tf_schema.fbs:  //     non-zero elements within this dimension (as those in the CSR matrix
+./compiler/tf_schema.fbs:  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+./compiler/tf_schema.fbs:  // ..., dn-1),
+./compiler/tf_schema.fbs:  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+./compiler/tf_schema.fbs:  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+./compiler/tf_schema.fbs:  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+./compiler/tf_schema.fbs:  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+./compiler/tf_schema.fbs:  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+./compiler/tf_schema.fbs:  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+./compiler/tf_schema.fbs:  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+./compiler/tf_schema.fbs:  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+./compiler/tf_schema.fbs:  // It's stored in the order of (dn, ..., dn+k-1).
+./compiler/tf_schema.fbs:  // If not block-sparse, this field is NULL.
+./compiler/tf_schema.fbs:  // each dimension to locate the non-zero values in the original dense tensor.
+./compiler/tf_schema.fbs:  // The tensor shape. The meaning of each entry is operator-specific but
+./compiler/tf_schema.fbs:  // target device is little-endian. In addition, all builtin operators assume
+./compiler/tf_schema.fbs:  // represented with -1.
+./compiler/tf_schema.fbs:  // non-VARIANT types. This is optional because the nested type can be omitted.
+./compiler/tf_schema.fbs:  // For weights-only quantization, use asymmetric quantization for non
+./compiler/tf_schema.fbs:  // If set to true, then weights-only op will use asymmetric quantization for
+./compiler/tf_schema.fbs:  // If set to true, then weights-only op will use asymmetric quantization for
+./compiler/tf_schema.fbs:  // Optional input are indicated by -1.
+./compiler/tf_schema.fbs:  // the list of non-static tensors that feed into the subgraph for inference.
+./compiler/tf_schema.fbs:// by index. The generous alignment accommodates mmap-friendly data structures.
+./compiler/input_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/input_realizer.cpp:    [](auto &node) { return std::pair(node->getName(), node.get()); });
+./compiler/input_realizer.cpp:    auto num_connection = node->getNumInputConnections();
+./compiler/input_realizer.cpp:      node->setProperty({"input_layers=" + ic.toString()});
+./compiler/input_realizer.cpp:      node->setInputConnectionName(sc.getIndex(), ic.getName());
+./compiler/input_realizer.cpp:      node->setInputConnectionIndex(sc.getIndex(), ic.getIndex());
+./compiler/meson.build:if get_option('enable-tflite-interpreter')
+./compiler/meson.build:    error('Tensorflow2-Lite dependency not found')
+./compiler/meson.build:  flat_header = custom_target('tflite-schema',
+./compiler/meson.build:                               command: [flatc_prog, '-o', '@OUTDIR@', '-c', '@INPUT@'])
+./compiler/meson.build:  flat_header2 = custom_target('nntrainer-schema',
+./compiler/meson.build:                               command: [flatc_prog, '-o', '@OUTDIR@', '-c', '@INPUT@'])
+./compiler/slice_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/slice_realizer.cpp:  this->start_layers.reserve(start_layers.size());
+./compiler/slice_realizer.cpp:                 std::back_inserter(this->start_layers),
+./compiler/slice_realizer.cpp:                 std::inserter(this->end_layers, this->end_layers.begin()),
+./compiler/slice_realizer.cpp:    LayerNode *operator->() { return node.get(); }
+./compiler/slice_realizer.cpp:      return std::pair<std::string, NodeInfo>(node->getName(), node);
+./compiler/slice_realizer.cpp:                  auto node_name = node->getName();
+./compiler/slice_realizer.cpp:                  for (auto i = 0u, num_node = node->getNumInputConnections();
+./compiler/slice_realizer.cpp:                    const auto &parent = node->getInputConnectionName(i);
+./compiler/slice_realizer.cpp:      if (node.second.node->getNumInputConnections() == 0) {
+./compiler/slice_realizer.cpp:        cur_start_layers.push_back(node.second.node->getName());
+./compiler/slice_realizer.cpp:  /** dfs function to perform depth-first search recursively with tracking */
+./compiler/slice_realizer.cpp:    if (mp[node->getName()].to_be_added) {
+./compiler/remap_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/remap_realizer.cpp:    remap_connection_fn ? node->remapConnections(remap_connection_fn)
+./compiler/remap_realizer.cpp:                        : node->remapIdentifiers(remap_fn);
+./compiler/flatten_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/flatten_realizer.cpp:    if (node->getFlatten() && !node->getDistribute()) {
+./compiler/flatten_realizer.cpp:      node->setProperty({"flatten=false"});
+./compiler/flatten_realizer.cpp:      auto layer_name = node->getName();
+./compiler/flatten_realizer.cpp:      flatten_node->setProperty({"input_layers=" + temp_name});
+./compiler/flatten_realizer.cpp:        name = iter->second;
+./compiler/flatten_realizer.cpp:        name = iter->second;
+./compiler/activation_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/activation_realizer.cpp:    if (node->getType() == ActivationLayer::type) {
+./compiler/activation_realizer.cpp:      /// node->getActivationToBeRealized() but explicitly stated in order to
+./compiler/activation_realizer.cpp:    if (auto act = node->getActivationToBeRealized();
+./compiler/activation_realizer.cpp:        << "unknown activation type for layer: " << node->getName();
+./compiler/activation_realizer.cpp:      auto layer_name = node->getName();
+./compiler/activation_realizer.cpp:      node->setProperty({"activation=none"});
+./compiler/activation_realizer.cpp:      act_node->setProperty({"input_layers=" + temp_name});
+./compiler/activation_realizer.cpp:        name = iter->second;
+./compiler/activation_realizer.cpp:        name = iter->second;
+./compiler/bn_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/previous_input_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/previous_input_realizer.cpp:    if (node->getNumInputConnections() != 0) {
+./compiler/previous_input_realizer.cpp:      << node->getName();
+./compiler/previous_input_realizer.cpp:    auto &prev_node = *(iter - 1);
+./compiler/previous_input_realizer.cpp:      "%s is identified as a non-input node and default input layer(%s) is "
+./compiler/previous_input_realizer.cpp:      node->getName().c_str(), prev_node->getName().c_str());
+./compiler/previous_input_realizer.cpp:    node->setProperty({"input_layers=" + prev_node->getName()});
+./compiler/flatbuffer_opnode.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/tflite_interpreter.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/tflite_interpreter.cpp:    while (pred->isVirtualNode()) {
+./compiler/tflite_interpreter.cpp:      assert(pred->arity() == 1);
+./compiler/tflite_interpreter.cpp:      pred = pred->arg(0);
+./compiler/tflite_interpreter.cpp:    return search->second;
+./compiler/tflite_interpreter.cpp:  std::unordered_map<KeyType, unsigned int> key2index; /**< key -> index map */
+./compiler/tflite_interpreter.cpp:  std::vector<DataType> index2data;                    /**< index -> data map */
+./compiler/tflite_interpreter.cpp:        const float *buf = variable->getData();
+./compiler/tflite_interpreter.cpp:        auto byte_size = dynamic ? 0 : variable->bytes();
+./compiler/tflite_interpreter.cpp:      [&tensors = this->tensors](const TfOpNode::Variables &variables) {
+./compiler/tflite_interpreter.cpp:      if (op_node->isVirtualNode())
+./compiler/tflite_interpreter.cpp:      update_opcode(op_node->getOpType());
+./compiler/tflite_interpreter.cpp:      if (op_node->isInputNode()) {
+./compiler/tflite_interpreter.cpp:        register_tensors(op_node->getInputs());
+./compiler/tflite_interpreter.cpp:         *    2. Transpose operator has two inputs - input to be
+./compiler/tflite_interpreter.cpp:        assert(op_node->getInputs()[0]->getData() == nullptr);
+./compiler/tflite_interpreter.cpp:        update_buffer_map({op_node->getInputs()[1]}, false);
+./compiler/tflite_interpreter.cpp:      register_tensors(op_node->getWeights());
+./compiler/tflite_interpreter.cpp:      update_buffer_map(op_node->getWeights(), false);
+./compiler/tflite_interpreter.cpp:      register_tensors(op_node->getOutputs());
+./compiler/tflite_interpreter.cpp:      update_buffer_map(op_node->getOutputs(), true);
+./compiler/tflite_interpreter.cpp:        if (variable->getName().find("nntrainer_internal_perm") !=
+./compiler/tflite_interpreter.cpp:        v.push_back(this->getTensorIndex(variable));
+./compiler/tflite_interpreter.cpp:      if (op_node->isVirtualNode())
+./compiler/tflite_interpreter.cpp:      if (op_node->isInputNode()) {
+./compiler/tflite_interpreter.cpp:        update_model_io_to(op_node->getInputs(), inputs);
+./compiler/tflite_interpreter.cpp:      if (op_node->isOutputNode()) {
+./compiler/tflite_interpreter.cpp:        update_model_io_to(op_node->getOutputs(), outputs);
+./compiler/tflite_interpreter.cpp:      << FUNC_TAG << "Cannot find index for tensor: " << tensor->getName();
+./compiler/tflite_interpreter.cpp:    ln->exportTo(e, ml::train::ExportMethods::METHOD_TFLITE);
+./compiler/tflite_interpreter.cpp:   * : Input -> Conv -> Conv -> Flatten -> [FC]:local_first
+./compiler/tflite_interpreter.cpp:   * : Input -> Conv -> Flatten -> [FC]:local_first -> Conv -> Flatten ->
+./compiler/tflite_interpreter.cpp:    if (tf_node->getOptionType() ==
+./compiler/tflite_interpreter.cpp:      tf_node->setNeedReorderWeight();
+./compiler/tflite_interpreter.cpp:        tf_node->getOptionType() !=
+./compiler/tflite_interpreter.cpp:    auto layer_node = searched_layer->second;
+./compiler/tflite_interpreter.cpp:    auto layer_node_inputs = layer_node->getInputConnections();
+./compiler/tflite_interpreter.cpp:    /// assume that the TfOpNode and the LayerNode have a one-to-one
+./compiler/tflite_interpreter.cpp:    tf_node->arity(layer_node_inputs.size());
+./compiler/tflite_interpreter.cpp:          return istrequal(node.get()->getName(), input_layer_name);
+./compiler/tflite_interpreter.cpp:        auto input_layer_node = input_layer_node_iterator->get();
+./compiler/tflite_interpreter.cpp:          tf_node->setArg(index, layer_to_tf.find(input_layer_node)->second);
+./compiler/tflite_interpreter.cpp:    if (tf_node->getOptionType() ==
+./compiler/tflite_interpreter.cpp:      tf_node->weightReorder(node_count);
+./compiler/tflite_interpreter.cpp:  auto graph_input_offset = map.getInputs().size() - 1;
+./compiler/tflite_interpreter.cpp:    auto dim = var->getDim();
+./compiler/tflite_interpreter.cpp:    /// change this var->getName when tensor have it's own name
+./compiler/tflite_interpreter.cpp:    auto name = fbb.CreateString("nntrainer_converted" + var->getName());
+./compiler/tflite_interpreter.cpp:      var->getData() == nullptr
+./compiler/tflite_interpreter.cpp:        ? buffer_map.getData().size() - graph_input_offset--
+./compiler/tflite_interpreter.cpp:        : buffer_map.getIndex(var->getData());
+./compiler/tflite_interpreter.cpp:    if (var->getName().find("nntrainer_internal_perm") != std::string::npos) {
+./compiler/tflite_interpreter.cpp:        for (auto parent_out : parent_node->getOutputs()) {
+./compiler/tflite_interpreter.cpp:            /// but it has been allocated (parent_out->getData()). But, the
+./compiler/tflite_interpreter.cpp:            /// (in->getData()).
+./compiler/tflite_interpreter.cpp:            if (parent_out->getData() == in->getData() ||
+./compiler/tflite_interpreter.cpp:                (in->getData() == nullptr && parent_out->getData())) {
+./compiler/tflite_interpreter.cpp:    if (node->isVirtualNode())
+./compiler/ini_interpreter.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/tflite_opnode.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/tflite_opnode.h:   * @brief Reorder Weight in case of NCHW --> NHWC
+./compiler/tflite_opnode.h:   * @return false if layer(OpNode) non-trainable
+./compiler/tflite_opnode.h:   * @brief Set n-th argument of the node
+./compiler/tflite_opnode.h:   * @brief Get n-th argument of the node
+./compiler/multiout_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/multiout_realizer.cpp:    NNTR_THROW_IF(node_names.count(node->getName()), std::invalid_argument)
+./compiler/multiout_realizer.cpp:      << "node name clashes: " << node->getName();
+./compiler/multiout_realizer.cpp:    node_names.emplace(node->getName());
+./compiler/multiout_realizer.cpp:    for (unsigned int i = 0, num_nodes = node->getNumInputConnections();
+./compiler/multiout_realizer.cpp:      Connection c(node->getInputConnectionName(i),
+./compiler/multiout_realizer.cpp:                   node->getInputConnectionIndex(i));
+./compiler/multiout_realizer.cpp:      iter->second++;
+./compiler/multiout_realizer.cpp:    auto ranges = multiout_nodes.equal_range(node->getName());
+./compiler/multiout_realizer.cpp:      ret.push_back(it->second);
+./compiler/loss_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/loss_realizer.cpp:    [](auto &node) { return std::pair(node->getName(), node.get()); });
+./compiler/loss_realizer.cpp:    if (loss_type.find(node->getType()) != loss_type.end()) {
+./compiler/loss_realizer.cpp:    assert(loss_node->getNumInputConnections() == 1);
+./compiler/loss_realizer.cpp:    auto &input_name = loss_node->getInputConnectionName(SINGLE_IN_IDX);
+./compiler/loss_realizer.cpp:    for (unsigned int i = 0; i < input_node->getNumOutputConnections(); ++i) {
+./compiler/loss_realizer.cpp:      if (istrequal(loss_node->getName(),
+./compiler/loss_realizer.cpp:                    input_node->getOutputConnection(i)->getName())) {
+./compiler/loss_realizer.cpp:        assert(loss_node->getOutputConnections().size() == 0);
+./compiler/loss_realizer.cpp:        input_node->setOutputLayers({});
+./compiler/loss_realizer.cpp:    if (loss_type.find(node->getType()) == loss_type.end()) {
+./compiler/compiler_fwd.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/bn_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/bn_realizer.cpp:    [](auto &node) { return std::pair(node->getName(), node.get()); });
+./compiler/bn_realizer.cpp:    if (istrequal(node->getType(), "batch_normalization")) {
+./compiler/bn_realizer.cpp:    auto &input_name = node->getInputConnectionName(SINGLE_INOUT_IDX);
+./compiler/bn_realizer.cpp:    for (unsigned int i = 0; i < input_node->getNumOutputConnections(); ++i) {
+./compiler/bn_realizer.cpp:      if (istrequal(node->getName(),
+./compiler/bn_realizer.cpp:                    input_node->getOutputConnection(i)->getName())) {
+./compiler/bn_realizer.cpp:        input_node->setOutputConnection(
+./compiler/bn_realizer.cpp:          i, node->getOutputConnection(SINGLE_INOUT_IDX)->getName(),
+./compiler/bn_realizer.cpp:    auto &output_name = node->getOutputConnection(SINGLE_INOUT_IDX)->getName();
+./compiler/bn_realizer.cpp:    for (unsigned int i = 0; i < output_node->getNumInputConnections(); ++i) {
+./compiler/bn_realizer.cpp:      if (istrequal(node->getName(), output_node->getInputConnectionName(i))) {
+./compiler/bn_realizer.cpp:        output_node->setInputConnectionName(
+./compiler/bn_realizer.cpp:          i, node->getInputConnectionName(SINGLE_INOUT_IDX));
+./compiler/bn_realizer.cpp:    if (!istrequal(node->getType(), "batch_normalization")) {
+./compiler/nntrainer_schema.fbs://Tensor Mapping : name - index
+./compiler/recurrent_realizer.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/recurrent_realizer.cpp:                 std::inserter(this->input_layers, this->input_layers.begin()),
+./compiler/recurrent_realizer.cpp:      iter->second = std::max(iter->second, idx);
+./compiler/recurrent_realizer.cpp:    return node->getType() == ZoneoutLSTMCellLayer::type;
+./compiler/recurrent_realizer.cpp:    node->setProperty({"max_timestep=" + std::to_string(max_time_step),
+./compiler/recurrent_realizer.cpp:        node->setProperty({"shared_from=" + node->getName()});
+./compiler/recurrent_realizer.cpp:      auto new_node = node->cloneConfiguration();
+./compiler/recurrent_realizer.cpp:      new_node->remapIdentifiers(
+./compiler/recurrent_realizer.cpp:      /// 2. override first output name to $name/$idx - 1
+./compiler/recurrent_realizer.cpp:        if (node->getName() != recurrent_input.getName() + "/0") {
+./compiler/recurrent_realizer.cpp:        new_node->setInputConnectionIndex(recurrent_input.getIndex(),
+./compiler/recurrent_realizer.cpp:        new_node->setInputConnectionName(recurrent_input.getIndex(),
+./compiler/recurrent_realizer.cpp:                                           std::to_string(time_idx - 1));
+./compiler/recurrent_realizer.cpp:      new_node->setProperty({"shared_from=" + node->getName()});
+./compiler/recurrent_realizer.cpp:          auto last_layer_name = name + "/" + std::to_string(unroll_for - 1);
+./compiler/loss_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/input_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/input_realizer.h: * @brief Graph realizer class which remaps input from start -> input layers
+./compiler/ini_interpreter.cpp:// SPDX-License-Identifier: Apache-2.0
+./compiler/ini_interpreter.cpp:  properties.reserve(num_entries - 1);
+./compiler/ini_interpreter.cpp://   auto g = graph->getUnsortedLayers(input_layer, output_layer);
+./compiler/ini_interpreter.cpp://     lnode->setProperty({"trainable=" + trainable});
+./compiler/ini_interpreter.cpp://     //   layer->weight_initializer = Tensor::Initializer::FILE_INITIALIZER;
+./compiler/ini_interpreter.cpp://     //   layer->bias_initializer = Tensor::Initializer::FILE_INITIALIZER;
+./compiler/ini_interpreter.cpp://     //   layer->initializer_file = backbone.save_path;
+./compiler/ini_interpreter.cpp://   g[0]->setProperty({"input_shape=" + input_shape});
+./compiler/ini_interpreter.cpp://   g[0]->setProperty({"input_layers=" + input_layers});
+./compiler/ini_interpreter.cpp:    IniSection s = IniSection::FromExportable(ln->getName(), *ln);
+./compiler/ini_interpreter.cpp:    s.setEntry("type", ln->getType());
+./compiler/ini_interpreter.cpp:    ml_logi("not-allowed property for the layer throws error");
+./compiler/ini_interpreter.cpp:        auto bg = this->deserialize(backbone);
+./compiler/ini_interpreter.cpp:          node->setProperty({"trainable=" + trainable});
+./compiler/activation_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/flatbuffer_interpreter.h:// SPDX-License-Identifier: Apache-2.0
+./compiler/previous_input_realizer.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/raw_file_data_producer.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/databuffer_factory.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/databuffer.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./dataset/databuffer.cpp:  auto generator = producer->finalize(input_dims, label_dims);
+./dataset/databuffer.cpp:  auto size = producer->size(input_dims, label_dims);
+./dataset/databuffer.cpp:        iq->notifyEndOfRequestEmpty();
+./dataset/databuffer.cpp:        auto sample_view = iq->requestEmptySlot();
+./dataset/databuffer.cpp:      auto sample_view = iq->requestEmptySlot();
+./dataset/databuffer.cpp:  return iq->requestFilledSlot();
+./dataset/databuffer.cpp:  return {producer->finalize(input_dims, label_dims),
+./dataset/databuffer.cpp:          producer->size(input_dims, label_dims)};
+./dataset/databuffer.cpp:    int pad_left = (barWidth - len) / 2;
+./dataset/databuffer.cpp:    int pad_right = barWidth - pad_left - len;
+./dataset/databuffer.cpp:    producer->setProperty(left);
+./dataset/databuffer.cpp:  return producer->getType();
+./dataset/databuffer.cpp:    producer->exportTo(exporter, method);
+./dataset/databuffer.cpp:  /// @todo this should be query from producer->isSerializable
+./dataset/databuffer.cpp:  if (producer->getType() == FuncDataProducer::type) {
+./dataset/func_data_producer.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/func_data_producer.cpp:  NNTR_THROW_IF(!this->cb, std::invalid_argument)
+./dataset/func_data_producer.cpp:  return [cb = this->cb, ud = this->user_data_prop->get(), input_data,
+./dataset/func_data_producer.cpp:                      std::vector<Tensor> &labels) -> bool {
+./dataset/iteration_queue.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/iteration_queue.h: * @brief Thread Safe Queue implementation dedicated for the non-owing pointer
+./dataset/iteration_queue.h: * - requestEmptySlot() will give a ScopedView<sample>
+./dataset/iteration_queue.h: * - requestFilledSlot() will give a ScopedView<Iteration>
+./dataset/iteration_queue.h:     * @param iterator non-inclusive iterator to mark the last
+./dataset/iteration_queue.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/iteration_queue.cpp:      current_iterator + 1 == being_filled->get().end()) {
+./dataset/iteration_queue.cpp:    being_filled->reset();
+./dataset/iteration_queue.cpp:    current_iterator = being_filled->get().begin();
+./dataset/iteration_queue.cpp:                       [current_being_filed = this->being_filled] {
+./dataset/iteration_queue.cpp:                         current_being_filed->markSampleFilled();
+./dataset/iteration_queue.cpp:                       [this, current_being_filled = this->being_filled] {
+./dataset/iteration_queue.cpp:                         this->markEmpty(current_being_filled);
+./dataset/iteration_queue.cpp:                         num_being_filled--;
+./dataset/iteration_queue.cpp:    &iteration->get(), [this, iteration] { markEmpty(iteration); },
+./dataset/iteration_queue.cpp:  /// we have to defined ordering of having stop_requested -> push nullptr to
+./dataset/iteration_queue.cpp:  /// filled_q -> stopped so when the case of changing to stopped it has to push
+./dataset/iteration_queue.cpp:    being_filled->setEndSample(current_iterator + 1);
+./dataset/iteration_queue.cpp:  num_being_filled--;
+./dataset/iteration_queue.cpp:  std::scoped_lock lock(this->notify_mutex, rhs.notify_mutex);
+./dataset/iteration_queue.cpp:    iq->markFilled(this);
+./dataset/iteration_queue.cpp:    NNTR_THROW_IF_CLEANUP(iq->empty_mutex.try_lock(), std::runtime_error,
+./dataset/iteration_queue.cpp:                          [this] { iq->empty_mutex.unlock(); })
+./dataset/iteration_queue.cpp:    /// warning: iq has to be locked with iq->empty_mutex
+./dataset/iteration_queue.cpp:    iq->num_being_filled--;
+./dataset/iteration_queue.cpp:    iq->filled_q.push(this);
+./dataset/iteration_queue.cpp:    iq->notify_emptied_cv.notify_all();
+./dataset/data_iteration.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/func_data_producer.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/random_data_producers.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/dir_data_producers.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/raw_file_data_producer.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/raw_file_data_producer.cpp:    return idx == sz - 1;
+./dataset/databuffer.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./dataset/dir_data_producers.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/dir_data_producers.cpp: * remain as TODO. ( BGR --> RGB )
+./dataset/dir_data_producers.cpp:    return idx == sz - 1;
+./dataset/data_producer.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/data_producer.h:   * @param[in] index current index with range of [0, size() - 1]. If
+./dataset/random_data_producers.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/random_data_producers.cpp:                     0, label_dim.width() - 1);
+./dataset/random_data_producers.cpp:           std::vector<Tensor> &labels) mutable -> bool {
+./dataset/random_data_producers.cpp:    return idx == sz - 1;
+./dataset/databuffer_factory.cpp:// SPDX-License-Identifier: Apache-2.0
+./dataset/data_iteration.h:// SPDX-License-Identifier: Apache-2.0
+./dataset/data_iteration.h:   * @note @a iteration must be non-inclusive
+./delegate.h:// SPDX-License-Identifier: Apache-2.0
+./delegate.h:  void setDevice(DeviceType device) { this->device = device; }
+./delegate.h:    this->soft_placement = soft_placement;
+./delegate.h:    this->precision_loss = precision_loss;
+./graph/connection.cpp:// SPDX-License-Identifier: Apache-2.0
+./graph/connection.cpp:    auto idx_part = std::string(sr.begin() + pos + 1, sr.end() - 1);
+./graph/network_graph.h:// SPDX-License-Identifier: Apache-2.0
+./graph/network_graph.h:    tensor_manager->deallocateTensors(dealloc_weights);
+./graph/network_graph.h:    tensor_manager->allocateWeights(
+./graph/network_graph.h:      std::get<3>(backward_iter_end->getExecutionOrder()));
+./graph/network_graph.h:  void deallocateWeights() { tensor_manager->deallocateWeights(); }
+./graph/network_graph.h:    tensor_manager->setOptimizations(val);
+./graph/network_graph.h:   * name pre-assigned to the layer can be changed if force_rename is enabled.
+./graph/network_graph.h:   * @brief     Optimize the graph memory utilization for in-place operations
+./graph/network_graph.h:   * @brief     Check if the given node can execute in-place
+./graph/network_graph.h:   * @param lnode node to check for in-place execution
+./graph/network_graph.cpp:// SPDX-License-Identifier: Apache-2.0
+./graph/network_graph.cpp: * @todo    Support multi-input graph.
+./graph/network_graph.cpp:  forward_iter_end = (*(cend() - 1)).get();
+./graph/network_graph.cpp:    auto order_idx = getBackwardingEndIter() - iter - 1;
+./graph/network_graph.cpp:    if (node->getTrainable())
+./graph/network_graph.cpp:    if (node->getTrainable())
+./graph/network_graph.cpp:    node->setExecutionOrder({forward_order, calc_gradient_order,
+./graph/network_graph.cpp:  graph_exec_end = std::get<3>((*(cbegin()))->getExecutionOrder());
+./graph/network_graph.cpp:    if (output_layer_node->requireLabel())
+./graph/network_graph.cpp:      auto type = output_layer_node->getType();
+./graph/network_graph.cpp:      switch (output_layer_node->getActivationType()) {
+./graph/network_graph.cpp:        LNODE(graph.getNode(output_layer_node->getInputConnectionName(0)));
+./graph/network_graph.cpp:    if (second_to_last_layer_node->getDistribute()) {
+./graph/network_graph.cpp:      lnode->setProperty({"distribute=true"});
+./graph/network_graph.cpp:    second_to_last_layer_node->setOutputLayers({lnode->getName()});
+./graph/network_graph.cpp:    lnode->setProperty(
+./graph/network_graph.cpp:      {"input_layers=" + second_to_last_layer_node->getName()});
+./graph/network_graph.cpp:    for (auto i = 0u, num_inode = node->getNumInputConnections(); i < num_inode;
+./graph/network_graph.cpp:      const auto &name = node->getInputConnectionName(i);
+./graph/network_graph.cpp:      const auto &idx = node->getInputConnectionIndex(i);
+./graph/network_graph.cpp:      node_setting_output->setOutputConnection(idx, node->getName(), i);
+./graph/network_graph.cpp:    if (lnode->getNumInputConnections() == 0) {
+./graph/network_graph.cpp:      if (!lnode->hasInputShapeProperty()) {
+./graph/network_graph.cpp:    if (lnode->getTrainable() ||
+./graph/network_graph.cpp:        must_support_backwarding.find(lnode->getName()) !=
+./graph/network_graph.cpp:      if (lnode->getTrainable()) {
+./graph/network_graph.cpp:        lnode->needsCalcGradient(true);
+./graph/network_graph.cpp:      if (lnode->supportBackwarding() && !optimize_memory) {
+./graph/network_graph.cpp:        lnode->needsCalcDerivative(true);
+./graph/network_graph.cpp:      for (auto i = 0u, num_node = lnode->getNumOutputConnections();
+./graph/network_graph.cpp:        auto conn = lnode->getOutputConnection(i);
+./graph/network_graph.cpp:        must_support_backwarding.insert(conn->getName());
+./graph/network_graph.cpp:    ln->needsCalcDerivative(true);
+./graph/network_graph.cpp:  if (batch_size == this->batch_size)
+./graph/network_graph.cpp:  this->batch_size = batch_size;
+./graph/network_graph.cpp:  auto allocated = tensor_manager->isAllocated();
+./graph/network_graph.cpp:    if ((*iter)->isFinalized()) {
+./graph/network_graph.cpp:      const RunLayerContext &context = (*iter)->getRunContext();
+./graph/network_graph.cpp:        tensor_manager->setBatchSize(ts.getName(), ts.getDim().batch());
+./graph/network_graph.cpp:          tensor_manager->setBatchSize(ts_grad.getName(),
+./graph/network_graph.cpp:      (*iter)->setBatch(batch_size);
+./graph/network_graph.cpp:  tensor_manager->setBatchSize(batch_size);
+./graph/network_graph.cpp:    input_dims[idx] = tensor_manager->getTensor(input_list[idx])->getDim();
+./graph/network_graph.cpp:    label_dims[idx] = tensor_manager->getTensor(label_list[idx])->getDim();
+./graph/network_graph.cpp:  if (!node->getTrainable())
+./graph/network_graph.cpp:  TRACE_MEMORY() << node->getName() + ": AG";
+./graph/network_graph.cpp:  TRACE_TIME() << node->getName() + ": AG";
+./graph/network_graph.cpp:  auto &rc = node->getRunContext();
+./graph/network_graph.cpp:    PROFILE_TIME_START(profile_keys.at(ln->getType()));
+./graph/network_graph.cpp:    PROFILE_TIME_END(profile_keys.at(ln->getType()));
+./graph/network_graph.cpp:    for (unsigned int j = 0; j < output_layer_node->getNumOutputs(); ++j) {
+./graph/network_graph.cpp:      out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j)));
+./graph/network_graph.cpp:  if (lptr_begin->requireLabel() == false)
+./graph/network_graph.cpp:    PROFILE_TIME_START(profile_keys.at(ln->getType()));
+./graph/network_graph.cpp:    PROFILE_TIME_END(profile_keys.at(ln->getType()));
+./graph/network_graph.cpp:    global_norm_data[idx] = w->getGradientNorm();
+./graph/network_graph.cpp:    w->clipGradientByGlobalNorm(global_norm);
+./graph/network_graph.cpp:  int max_exec_order = -1;
+./graph/network_graph.cpp:    const auto &exec_order = ln->getExecutionOrder();
+./graph/network_graph.cpp:    if (ln->needsCalcDerivative() || ln->needsCalcGradient()) {
+./graph/network_graph.cpp:      << "layer node: " << ln->getName()
+./graph/network_graph.cpp:    tensor_manager->allocateTensors(
+./graph/network_graph.cpp:      std::get<0>((*(cend() - 1))->getExecutionOrder()));
+./graph/network_graph.cpp:    tensor_manager->allocateTensors(
+./graph/network_graph.cpp:      std::get<3>(backward_iter_end->getExecutionOrder()));
+./graph/network_graph.cpp:      if ((*iter)->getName() != output_layer)
+./graph/network_graph.cpp:         iter != graph.cend() - num_layers_remove_end; iter++) {
+./graph/network_graph.cpp:      if ((*iter)->getName() != input_layer)
+./graph/network_graph.cpp:                 graph.cend() - num_layers_remove_end, std::back_inserter(ret),
+./graph/network_graph.cpp:  if (!lnode->supportInPlace())
+./graph/network_graph.cpp:  /** layers which behave as a no-op - flatten */
+./graph/network_graph.cpp:    return lnode->getType() == FlattenLayer::type ||
+./graph/network_graph.cpp:           lnode->getType() == IdentityLayer::type;
+./graph/network_graph.cpp:  /** layers which behave as a no-op but shares memory among parallel nodes -
+./graph/network_graph.cpp:    return lnode->getType() == MultiOutLayer::type;
+./graph/network_graph.cpp:   * derivatives and weights, if any - batch normalization
+./graph/network_graph.cpp:      return (lnode->getType() == BatchNormalizationLayer::type) ||
+./graph/network_graph.cpp:             (lnode->getType() == LayerNormalizationLayer::type);
+./graph/network_graph.cpp:   * @note Conditions to decide if this layer node can be in-place:
+./graph/network_graph.cpp:   * 1. if the layer is a no-op, then it can operate in-place as it is not
+./graph/network_graph.cpp:   * layer will be non-restricting.
+./graph/network_graph.cpp:  if (no_op(lnode) || !lnode->supportBackwarding()) {
+./graph/network_graph.cpp:    for (auto i = 0u, num_node = lnode->getNumInputConnections(); i < num_node;
+./graph/network_graph.cpp:      const auto &input_name = lnode->getInputConnectionName(i);
+./graph/network_graph.cpp:      if (getLayerNode(input_name)->executeInPlace() == InPlace::RESTRICTING)
+./graph/network_graph.cpp:   * @note Conditions to decide if this layer node can be in-place:
+./graph/network_graph.cpp:   * if the layer is a no-op-shared, then it can operate in-place as it is not
+./graph/network_graph.cpp:   * @note Conditions to decide if this layer node can be in-place:
+./graph/network_graph.cpp:   * This is a generic case where the layer can support in-place but will
+./graph/network_graph.cpp:   * modify its input in-place. This includes layers like activation, etc.
+./graph/network_graph.cpp:   * Apply checks below to ensure that the layers can work in-place:
+./graph/network_graph.cpp:   * - if any of the input layer are restriction, then this layer cannot work
+./graph/network_graph.cpp:   * - if all of the input layers are either not inplace or have no
+./graph/network_graph.cpp:   * restrictions, then this layer can operate in-place.
+./graph/network_graph.cpp:   * work in-place such as concat layer, split layer, addition layer, dropout
+./graph/network_graph.cpp:   * @todo This logic sets layers to in-place one-by-one as they arrive. However
+./graph/network_graph.cpp:   * setting some layers to in-place can save more memory than others (like
+./graph/network_graph.cpp:   * memory save they provide and then make them in-place in that order.
+./graph/network_graph.cpp:  if (lnode->getType() == ActivationLayer::type ||
+./graph/network_graph.cpp:      lnode->getType() == BatchNormalizationLayer::type ||
+./graph/network_graph.cpp:      lnode->getType() == LayerNormalizationLayer::type) {
+./graph/network_graph.cpp:    for (auto i = 0u, num_node = lnode->getNumInputConnections(); i < num_node;
+./graph/network_graph.cpp:      if (getLayerNode(lnode->getInputConnectionName(i))->executeInPlace() ==
+./graph/network_graph.cpp:     * is not required during backwarding, then it is a non-restricting in-place
+./graph/network_graph.cpp:      lnode->executeInPlace(canExecuteInPlace(lnode));
+./graph/network_graph.cpp:  if (lnode->getType() == MultiOutLayer::type) {
+./graph/network_graph.cpp:   * @todo for layers which support in-place, both variables and gradients
+./graph/network_graph.cpp:   * in-place or not
+./graph/network_graph.cpp:                 [](const Var_Grad *vg) { return vg->getDim(); });
+./graph/network_graph.cpp:  auto init_context = lnode->finalize(input_dims);
+./graph/network_graph.cpp:   * Request manager for either a pre-allocated output as input or a newly
+./graph/network_graph.cpp:                 [](auto const &vg) { return vg->getName(); });
+./graph/network_graph.cpp:  const std::vector<Var_Grad *> &inputs = tensor_manager->requestInputs(
+./graph/network_graph.cpp:  /** In-Place optimizations */
+./graph/network_graph.cpp:   * Request manager for either a pre-allocated input as output or a newly
+./graph/network_graph.cpp:   * node is going to be used with in-place optimizations.
+./graph/network_graph.cpp:  if (lnode->executeInPlace() != InPlace::NONE) {
+./graph/network_graph.cpp:        if (lnode->getType() == IdentityLayer::type) {
+./graph/network_graph.cpp:          s.variable_spec.reference_name = inputs[i]->getName();
+./graph/network_graph.cpp:          s.variable_spec.reference_name = inputs[0]->getName();
+./graph/network_graph.cpp:        s.gradient_spec->request_type =
+./graph/network_graph.cpp:        if (lnode->getType() == IdentityLayer::type) {
+./graph/network_graph.cpp:          s.gradient_spec->reference_name = inputs[i]->getGradientName();
+./graph/network_graph.cpp:          s.gradient_spec->reference_name = inputs[0]->getGradientName();
+./graph/network_graph.cpp:  if (lnode->requireLabel()) {
+./graph/network_graph.cpp:      << lnode->getName() << " out spec size: " << out_specs.size();
+./graph/network_graph.cpp:      << "label space does not exist for " << lnode->getName();
+./graph/network_graph.cpp:    out_specs[0].gradient_spec->request_type =
+./graph/network_graph.cpp:  if (lnode->getOutputConnections().size() == 0u) {
+./graph/network_graph.cpp:                      std::get<0>(forward_iter_end->getExecutionOrder()));
+./graph/network_graph.cpp:  if (lnode->getType() == RNNCellLayer::type or
+./graph/network_graph.cpp:      lnode->getType() == LSTMCellLayer::type or
+./graph/network_graph.cpp:      lnode->getType() == GRUCellLayer::type) {
+./graph/network_graph.cpp:  const std::vector<Var_Grad *> &outputs = tensor_manager->requestTensors(
+./graph/network_graph.cpp:    out_specs, Manager::TensorGroupType::OUTPUT, lnode->getExecutionOrder(),
+./graph/network_graph.cpp:    lnode->getName());
+./graph/network_graph.cpp:  if (auto shared_node_str = lnode->getSharedFrom(); !shared_node_str.empty()) {
+./graph/network_graph.cpp:    //   << shared_node_str << " requested from " << lnode->getName();
+./graph/network_graph.cpp:    // NNTR_THROW_IF(shared_node->getType() != lnode->getType(),
+./graph/network_graph.cpp:    //   << shared_node->getType() << " depedent node type: " <<
+./graph/network_graph.cpp:    //   lnode->getType()
+./graph/network_graph.cpp:    //   << " depedent node name: " << lnode->getName();
+./graph/network_graph.cpp:    // NNTR_THROW_IF(!shared_node->isFinalized(), std::invalid_argument)
+./graph/network_graph.cpp:    //   << shared_node_str << " dependent node name: " << lnode->getName();
+./graph/network_graph.cpp:    // auto num_weight = shared_node->getNumWeights();
+./graph/network_graph.cpp:    //   shared_weight_names.emplace_back(shared_node->getWeightName(i));
+./graph/network_graph.cpp:    // auto &rc = node->getRunContext();
+./graph/network_graph.cpp:  lnode->configureRunContext(
+./graph/network_graph.cpp:    tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(),
+./graph/network_graph.cpp:                                   lnode->getTrainable(), shared_weight_names),
+./graph/network_graph.cpp:    tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
+./graph/network_graph.cpp:                                   lnode->getTrainable(), shared_tensor_names));
+./graph/network_graph.cpp:  auto init_context = lnode->getInitContext();
+./graph/network_graph.cpp:    const auto name = lnode->getName() + ":" + spec.variable_spec.name;
+./graph/network_graph.cpp:    auto orders = tensor_manager->getTensorExecutionOrders(name, false);
+./graph/network_graph.cpp:        tensor_manager->getTensorExecutionOrders(name + ":grad", false);
+./graph/network_graph.cpp:    auto orders = tensor_manager->getTensorExecutionOrders(name, true);
+./graph/network_graph.cpp:        tensor_manager->getTensorExecutionOrders(name + ":grad", false);
+./graph/network_graph.cpp:    auto orders = tensor_manager->getTensorExecutionOrders(name, false);
+./graph/network_graph.cpp:        tensor_manager->getTensorExecutionOrders(name + ":grad", false);
+./graph/network_graph.cpp:  auto is_input_node = [](const LayerNode *node) -> bool {
+./graph/network_graph.cpp:    return node->getInputConnections().empty();
+./graph/network_graph.cpp:    lnode->setTensorType(getModelTensorType());
+./graph/network_graph.cpp:    if (profile_keys.find(lnode->getType()) == profile_keys.end()) {
+./graph/network_graph.cpp:      PROFILE_TIME_REGISTER_EVENT(event_key, lnode->getType());
+./graph/network_graph.cpp:      profile_keys[lnode->getType()] = event_key;
+./graph/network_graph.cpp:      if (input_map.find(lnode->getName()) == input_map.end())
+./graph/network_graph.cpp:      inputs = input_map.at(lnode->getName());
+./graph/network_graph.cpp:    if (idx == graph.size() - 1)
+./graph/network_graph.cpp:    for (auto i = 0u, num_node = lnode->getNumOutputConnections(); i < num_node;
+./graph/network_graph.cpp:      auto conn = lnode->getOutputConnection(i);
+./graph/network_graph.cpp:                lnode->getName().c_str(), i);
+./graph/network_graph.cpp:      auto sink_node = getLayerNode(conn->getName());
+./graph/network_graph.cpp:        input_map.try_emplace({sink_node->getName(), {}});
+./graph/network_graph.cpp:      NNTR_THROW_IF(sink_node->getInputConnectionName(conn->getIndex()) !=
+./graph/network_graph.cpp:                      lnode->getName(),
+./graph/network_graph.cpp:        << "node pair does not match between " << lnode->getName() << ' '
+./graph/network_graph.cpp:        << sink_node->getName();
+./graph/network_graph.cpp:      auto &sink_tensors = it->second;
+./graph/network_graph.cpp:      sink_tensors.resize(sink_node->getNumInputConnections());
+./graph/network_graph.cpp:      sink_tensors[conn->getIndex()] = outputs[i];
+./graph/network_graph.cpp:    auto &rc = lnode->getRunContext();
+./graph/network_graph.cpp:    auto first_grad_access = std::get<1>(lnode->getExecutionOrder());
+./graph/network_graph.cpp:    auto last_grad_access = std::get<3>(lnode->getExecutionOrder());
+./graph/network_graph.cpp:        if (tensor_manager->isFirstAccess(
+./graph/network_graph.cpp:              std::get<0>(lnode->getExecutionOrder()), true)) {
+./graph/network_graph.cpp:        if (tensor_manager->isLastAccess(rc.getWeight(i).getName(),
+./graph/network_graph.cpp:        if (tensor_manager->isFirstAccess(rc.getWeightGrad(i).getName(),
+./graph/network_graph.cpp:        if (tensor_manager->isLastAccess(rc.getWeightGrad(i).getName(),
+./graph/network_graph.cpp:             tensor_manager->isSecondLastAccess(rc.getWeightGrad(i).getName(),
+./graph/network_graph.cpp:    auto num_input = node->getNumInputs();
+./graph/network_graph.cpp:    input_list.push_back(node->getInput(0).getName());
+./graph/network_graph.cpp:    input_dims.push_back(node->getInputDimensions()[0]);
+./graph/network_graph.cpp:  auto is_label_node = [](LayerNode *node) { return node->requireLabel(); };
+./graph/network_graph.cpp:    /// @todo change this as lnode->getNumLabels of sorts
+./graph/network_graph.cpp:    auto num_label = node->getNumOutputs();
+./graph/network_graph.cpp:    NNTR_THROW_IF(!node->getOutputConnections().empty(), std::invalid_argument)
+./graph/network_graph.cpp:    output_list.push_back(node->getOutput(0).getName());
+./graph/network_graph.cpp:    label_list.push_back(node->getOutputGrad(0).getName());
+./graph/network_graph.cpp:    label_dims.push_back(node->getOutputDimensions()[0]);
+./graph/network_graph.cpp:  clip_weights = tensor_manager->getWeights([](const Weight *w) {
+./graph/network_graph.cpp:    return w->hasGradient() && w->isGradientLastAccess() &&
+./graph/network_graph.cpp:           w->isGradientClipByGlobalNorm();
+./graph/network_graph.cpp:      tensor_manager->fillPlaceholder(names[idx], Tensor());
+./graph/network_graph.cpp:      tensor_manager->fillPlaceholder(names[idx], data[0]);
+./graph/network_graph.cpp:      tensor_manager->fillPlaceholder(names[idx], data[idx]);
+./graph/network_graph.cpp:    output_tensors.push_back(*tensor_manager->getTensor(name));
+./graph/network_graph.cpp:void NetworkGraph::flushCache() { tensor_manager->flushCache(); }
+./graph/network_graph.cpp:  tensor_manager->flushCacheExcept(order);
+./graph/network_graph.cpp:  for (auto const &w : tensor_manager->getWeights()) {
+./graph/network_graph.cpp:    if (w->isGradientLastAccess() && w->hasGradient()) {
+./graph/network_graph.cpp:      const TensorDim &dim = w->getDim();
+./graph/network_graph.cpp:      w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
+./graph/network_graph.cpp:        dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
+./graph/network_graph.cpp:        w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS));
+./graph/graph_node.h:// SPDX-License-Identifier: Apache-2.0
+./graph/graph_node.h:  value_type operator->() const {
+./graph/graph_node.h:   * @brief override for -- operator
+./graph/graph_node.h:  GraphNodeIterator &operator--() {
+./graph/graph_node.h:    p -= 1;
+./graph/graph_node.h:   * @brief override for operator--
+./graph/graph_node.h:  GraphNodeIterator operator--(int) {
+./graph/graph_node.h:    p -= 1;
+./graph/graph_node.h:  GraphNodeIterator operator-(const difference_type offset) const {
+./graph/graph_node.h:    return GraphNodeIterator(p - offset);
+./graph/graph_node.h:  difference_type operator-(const GraphNodeIterator &other) const {
+./graph/graph_node.h:    return p - other.p;
+./graph/graph_node.h:  GraphNodeIterator &operator-=(const difference_type offset) {
+./graph/graph_node.h:    p -= offset;
+./graph/graph_node.h:    auto temp = std::reverse_iterator<T_iterator>::current - 1;
+./graph/graph_node.h:  typename T_iterator::value_type operator->() const {
+./graph/graph_node.h:    auto temp = std::reverse_iterator<T_iterator>::current - 1;
+./graph/connection.h:// SPDX-License-Identifier: Apache-2.0
+./graph/graph_core.h:// SPDX-License-Identifier: Apache-2.0
+./graph/graph_core.h:      //        node_list[i]->copy(from.node_list[i]);
+./graph/graph_core.h:   * name pre-assigned to the node can be changed if force_rename is enabled.
+./graph/graph_core.cpp:// SPDX-License-Identifier: Apache-2.0
+./graph/graph_core.cpp:  node_map[node->getName()] = node_list.size() - 1;
+./graph/graph_core.cpp:    for (auto const &in_conn : node->getInputConnections()) {
+./graph/graph_core.cpp:    auto index = getNodeIdx((*i)->getName());
+./graph/graph_core.cpp:  // Quite likely this is not needed - verify this
+./graph/graph_core.cpp:  auto to_lower = [](const std::string &str) -> std::string {
+./graph/graph_core.cpp:  /** If just prefix with node name makes it unique - directly set the name */
+./graph/graph_core.cpp:  if (node_map.find(from->getName()) == node_map.end())
+./graph/graph_core.cpp:  if (node_map.find(to->getName()) != node_map.end())
+./graph/graph_core.cpp:  unsigned int from_idx = getNodeIdx(from->getName());
+./graph/graph_core.cpp:  node_map.erase(from->getName());
+./graph/graph_core.cpp:  node_map[to->getName()] = from_idx;
+./graph/graph_core.cpp:    if (iter->getInputConnections().size() == 0) {
+./graph/graph_core.cpp:    if (iter->getOutputConnections().size() == 0) {
+./layers/pooling2d_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/embedding.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/embedding.cpp:      // If in_data[i] - 1 < 0, then it skips.
+./layers/embedding.cpp:      // If in_data[i] - 1 < 0, then it skips.
+./layers/input_layer.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./layers/concat_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/concat_layer.cpp:           "along non-concat dimension";
+./layers/zoneout_lstmcell.h:// SPDX-License-Identifier: Apache-2.0
+./layers/preprocess_l2norm_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/split_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/flatten_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/entropy_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/bn_layer.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./layers/fc_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/fc_layer.h:    fc_props; /**< fc layer properties : unit - number of output neurons */
+./layers/concat_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/attention_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_node.h:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_node.h:  RESTRICTING,    /**< layer is in-place and does place restriction on layers
+./layers/layer_node.h:                    ahead of it to be in-place */
+./layers/layer_node.h:  NON_RESTRICTING /**< layer is in-place and does NOT place restriction on the
+./layers/layer_node.h:                    layers ahead of it to be in-place */
+./layers/layer_node.h:   * @details   This function accepts vector of properties in the format -
+./layers/layer_node.h:   * @brief   If the current layer can support in-place
+./layers/layer_node.h:   * @brief   Notify that this layer will execute in-place
+./layers/layer_node.h:      throw std::runtime_error("Error setting layer to work in-place");
+./layers/layer_node.h:   * @brief   Get if the layer is going to execute in-place
+./layers/layer_node.h:  bool supportBackwarding() const { return getLayer()->supportBackwarding(); }
+./layers/layer_node.h:    return run_context->getNumInputs();
+./layers/layer_node.h:    return run_context->getNumOutputs();
+./layers/layer_node.h:    return run_context->getNumWeights();
+./layers/layer_node.h:    if (run_context->weightHasGradient(idx)) {
+./layers/layer_node.h:      return Weight(run_context->getWeight(idx),
+./layers/layer_node.h:                    run_context->getWeightGrad(idx),
+./layers/layer_node.h:                    run_context->getWeightName(idx));
+./layers/layer_node.h:      return Weight(run_context->getWeight(idx), Tensor(),
+./layers/layer_node.h:                    run_context->getWeightName(idx));
+./layers/layer_node.h:    return run_context->getWeightObject(idx);
+./layers/layer_node.h:    return run_context->getWeight(idx);
+./layers/layer_node.h:    return run_context->getWeightGrad(idx);
+./layers/layer_node.h:    return run_context->getWeightName(idx);
+./layers/layer_node.h:    return run_context->getInput(idx);
+./layers/layer_node.h:    return run_context->getInputGrad(idx);
+./layers/layer_node.h:    return run_context->getOutput(idx);
+./layers/layer_node.h:    return run_context->getOutputGrad(idx);
+./layers/layer_node.h:    return run_context->getOutputGradUnsafe(idx);
+./layers/layer_node.h:    inplace; /**< store if the current layer is going to operate in-place */
+./layers/layer_impl.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/reshape_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/reshape_layer.cpp: * @todo Update flatten to work in-place properly.
+./layers/reshape_layer.cpp:  if ((int)out_dim.getDataLen() == -1) {
+./layers/grucell.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/grucell.cpp: * h_prev --------d1------->[*]-------d0----->[+]---d0--> h
+./layers/grucell.cpp: *          |  |             +-----[1-]------>[*]
+./layers/grucell.cpp: *          | [*]<---+ d15   |d5               | d6
+./layers/grucell.cpp: *          +- |--+------|---+    |          |    |
+./layers/grucell.cpp: *             +---------|--------|----------+    |
+./layers/grucell.cpp: *   xs------------------+--------+---------------+
+./layers/grucell.cpp:  temp = update_gate.multiply(-1.0).add(1.0);
+./layers/grucell.cpp:                              -1.0f); // d_update_gate = d5
+./layers/grucell.cpp:  update_gate.multiply(-1.0, d_memory_cell);
+./layers/grucell.cpp:  epsilon(1e-3) {
+./layers/grucell.cpp:  // - weight_ih ( input to hidden )
+./layers/grucell.cpp:  // weight_ih_dim : [ 1, 1, feature_size, NUMGATE * unit ] -> z, r, g
+./layers/grucell.cpp:  // - weight_hh ( hidden to hidden )
+./layers/grucell.cpp:  // weight_hh_dim : [ 1, 1, unit, NUM_GATE * unit ] -> z, r, g
+./layers/grucell.cpp:      // - bias_h ( input bias, hidden bias are integrate to 1 bias )
+./layers/grucell.cpp:      // bias_h_dim : [ 1, 1, 1, NUM_GATE * unit ] -> z, r, g
+./layers/grucell.cpp:      // - bias_ih ( input bias )
+./layers/grucell.cpp:      // bias_ih_dim : [ 1, 1, 1, NUM_GATE * unit ] -> z, r, g
+./layers/grucell.cpp:      // - bias_hh ( hidden bias )
+./layers/grucell.cpp:      // bias_hh_dim : [ 1, 1, 1, NUM_GATE * unit ] -> z, r, g
+./layers/permute_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/mol_attention_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/mol_attention_layer.cpp:    << "MoL Attention layer needs 3-4 inputs.";
+./layers/mol_attention_layer.cpp:  u_base.add_i(-0.5);
+./layers/mol_attention_layer.cpp:  Tensor beta_eps = beta.add(1e-8f);
+./layers/mol_attention_layer.cpp:  Tensor dprob_right = dprob.multiply(-1);
+./layers/mol_attention_layer.cpp:  Tensor beta_eps = beta.add(1e-8f);
+./layers/mol_attention_layer.cpp:  Tensor dm_neg = du_neg_m.multiply(-1).sum(2);
+./layers/mol_attention_layer.cpp:  Tensor dbeta_eps_neg = du_neg_m.multiply(u_neg_div).multiply(-1).sum(2);
+./layers/mol_attention_layer.cpp:  Tensor dm_pos = du_pos_m.multiply(-1).sum(2);
+./layers/mol_attention_layer.cpp:  Tensor dbeta_eps_pos = du_pos_m.multiply(u_pos_div).multiply(-1).sum(2);
+./layers/identity_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/cross_entropy_sigmoid_loss_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/cross_entropy_sigmoid_loss_layer.cpp:    // log(1 + exp(-abs(y))) + max(y, 0)
+./layers/loss/cross_entropy_sigmoid_loss_layer.cpp:                        .multiply(-1.0)
+./layers/loss/cross_entropy_sigmoid_loss_layer.cpp:    // loss = log(1 + exp(-abs(y))) + max(y, 0) - (y * y2)
+./layers/loss/cross_entropy_softmax_loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/constant_derivative_loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/kld_loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/kld_loss_layer.h: * @brief  KLD (Kullback-Leibler Divergence) loss implementation
+./layers/loss/kld_loss_layer.h: * @class   KLD (Kullback-Leibler Divergence) Loss layer
+./layers/loss/cross_entropy_softmax_loss_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/cross_entropy_softmax_loss_layer.cpp:    l = y2.multiply(hidden_.apply(logFloat)).sum_by_batch().multiply(-1);
+./layers/loss/cross_entropy_softmax_loss_layer.cpp:  /// @note y and ret_derivative can be same here, so this has to be out-place
+./layers/loss/cross_entropy_softmax_loss_layer.cpp:  // working in-place
+./layers/loss/constant_derivative_loss_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/cross_entropy_loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/loss_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/mse_loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/cross_entropy_sigmoid_loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/loss_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/mse_loss_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/mse_loss_layer.cpp:  // hidden_ <- y2 - y;
+./layers/loss/kld_loss_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/loss/kld_loss_layer.cpp: * @brief  KLD (Kullback-Leibler Divergence) loss implementation
+./layers/dropout.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/dropout.cpp:  // Assume it is in-place calculation. It means input and output share mem
+./layers/dropout.cpp:    /** @todo make this in-place */
+./layers/dropout.cpp:  // Assume it is in-place calculation
+./layers/dropout.cpp:    /** @todo make this in-place */
+./layers/embedding.h:// SPDX-License-Identifier: Apache-2.0
+./layers/rnn.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/rnn.cpp:// - weight_ih ( input to hidden )
+./layers/rnn.cpp:// - weight_hh ( hidden to hidden )
+./layers/rnn.cpp:// - bias_h ( input bias, hidden bias )
+./layers/rnn.cpp:// - bias_ih ( input bias )
+./layers/rnn.cpp:// - bias_hh ( hidden bias )
+./layers/rnn.cpp:  epsilon(1e-3) {
+./layers/rnn.cpp:          hidden_state_slice.getSharedDataTensor({unit}, (timestep - 1) * unit);
+./layers/rnn.cpp:      // In-place calculation for activation
+./layers/rnn.cpp:        batch * unit * max_timestep + (max_timestep - 1) * unit);
+./layers/rnn.cpp:        batch * unit * max_timestep + (max_timestep - 1) * unit);
+./layers/rnn.cpp:    for (unsigned int timestep = max_timestep; timestep-- > 0;) {
+./layers/rnn.cpp:          (timestep - 1) * hidden_state_t.width());
+./layers/rnn.cpp:                                      (timestep - 1) * deriv_t.width());
+./layers/conv1d_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/conv1d_layer.cpp:    conv2d_layer->setProperty({prop});
+./layers/conv1d_layer.cpp:  conv2d_layer->finalize(context);
+./layers/conv1d_layer.cpp:  conv2d_layer->forwarding(context, training);
+./layers/conv1d_layer.cpp:  conv2d_layer->calcDerivative(context);
+./layers/conv1d_layer.cpp:  conv2d_layer->calcGradient(context);
+./layers/lstmcell_core.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/lstmcell_core.cpp:  epsilon(1e-3) {}
+./layers/preprocess_translate_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/preprocess_translate_layer.cpp:  epsilon(1e-5),
+./layers/preprocess_translate_layer.cpp:    translate_dist = std::uniform_real_distribution<float>(-random_translate,
+./layers/permute_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/permute_layer.cpp:  ss << arr[0].get() - 1 << ':' << arr[1].get() - 1 << ':' << arr[2].get() - 1;
+./layers/permute_layer.cpp:      check_transpose.set(direction[i] - 1, true);
+./layers/permute_layer.cpp:      this->reverse_direction[direction[i] - 1].set(i + 1);
+./layers/preprocess_flip_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/preprocess_l2norm_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/positional_encoding_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/grucell.h:// SPDX-License-Identifier: Apache-2.0
+./layers/lstm.h:// SPDX-License-Identifier: Apache-2.0
+./layers/lstm.h: * @brief  This is Long Short-Term Memory Layer Class of Neural Network
+./layers/bn_layer.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./layers/bn_layer.cpp:   * caches the deviation -> input - avg(input)
+./layers/bn_layer.cpp:   * more in-place calculation) can save memory during memory optimization.
+./layers/bn_layer.cpp:   * norm to execute in-place. Running in-place leads to same memory footprint
+./layers/bn_layer.cpp:    mu.add_i(t_reduced, 1 - momentum);
+./layers/bn_layer.cpp:    var.add_i(cvar, 1 - momentum);
+./layers/bn_layer.cpp:    cvar.pow(-0.5f, invstd);
+./layers/bn_layer.cpp:    invstd.pow_i(-0.5f);
+./layers/bn_layer.cpp:     * This implementation depends on the pre-calculated dbeta calculated.
+./layers/addition_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/addition_layer.cpp:  /** @todo check possibility for in-place of addition layer */
+./layers/rnncell.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/rnncell.cpp:// - weight_ih ( weights of input to hidden )
+./layers/rnncell.cpp:// - weight_hh ( weights of hidden to hidden )
+./layers/rnncell.cpp:// - bias_h ( input bias, hidden_bias )
+./layers/rnncell.cpp:// - bias_ih ( input bias )
+./layers/rnncell.cpp:// - bias_hh ( hidden bias )
+./layers/rnncell.cpp:  epsilon(1e-3) {
+./layers/activation_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/tflite_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/tflite_layer.cpp:  std::string ext(v.end() - ending_len, v.end());
+./layers/tflite_layer.cpp:  std::for_each(ext.end() - ending_len, ext.end(),
+./layers/tflite_layer.cpp:    if (is_output && interpreter->tensor(tensor_idx)->type != kTfLiteFloat32)
+./layers/tflite_layer.cpp:    unsigned int num_dims = interpreter->tensor(tensor_idx)->dims->size;
+./layers/tflite_layer.cpp:        ml::train::TensorDim::MAXDIM - dim_idx - 1,
+./layers/tflite_layer.cpp:        interpreter->tensor(tensor_idx)->dims->data[num_dims - dim_idx - 1]);
+./layers/tflite_layer.cpp:  NNTR_THROW_IF(interpreter->AllocateTensors() != kTfLiteOk, std::runtime_error)
+./layers/tflite_layer.cpp:  setDimensions(interpreter->inputs(), dims, false);
+./layers/tflite_layer.cpp:      << "Input dimensions mismatch -> " << idx << ":" << dims[idx] << " "
+./layers/tflite_layer.cpp:  setDimensions(interpreter->outputs(), output_dims, true);
+./layers/tflite_layer.cpp:  auto in_indices = interpreter->inputs();
+./layers/tflite_layer.cpp:    interpreter->tensor(in_indices[idx])->data.raw =
+./layers/tflite_layer.cpp:  auto out_indices = interpreter->outputs();
+./layers/tflite_layer.cpp:    interpreter->tensor(out_indices[idx])->data.raw =
+./layers/tflite_layer.cpp:  int status = interpreter->Invoke();
+./layers/tflite_layer.cpp:  setDimensions(interpreter->outputs(), out_tf_dim, true);
+./layers/identity_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/plugged_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/plugged_layer.h:    layerImpl(pluggable->createfunc()),
+./layers/plugged_layer.h:    destroy_func(pluggable->destroyfunc) {
+./layers/plugged_layer.h:  const std::string getType() const override { return layerImpl->getType(); }
+./layers/plugged_layer.h:    layerImpl->finalize(context);
+./layers/plugged_layer.h:    layerImpl->forwarding(context, training);
+./layers/plugged_layer.h:    layerImpl->calcDerivative(context);
+./layers/plugged_layer.h:    layerImpl->calcGradient(context);
+./layers/plugged_layer.h:    layerImpl->setProperty(values);
+./layers/plugged_layer.h:    layerImpl->exportTo(exporter, method);
+./layers/plugged_layer.h:    layerImpl->setBatch(context, batch);
+./layers/plugged_layer.h:  bool supportInPlace() const override { return layerImpl->supportInPlace(); }
+./layers/plugged_layer.h:  bool requireLabel() const override { return layerImpl->requireLabel(); }
+./layers/plugged_layer.h:    return layerImpl->supportBackwarding();
+./layers/layer_node.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_node.cpp:  lnode->setProperty(properties);
+./layers/layer_node.cpp:  if (layer && layer->getType() == TimeDistLayer::type) {
+./layers/layer_node.cpp:  layer->setProperty(left_properties);
+./layers/layer_node.cpp:      layer->setProperty({"activation=" + to_string(act_prop)});
+./layers/layer_node.cpp:  //   << con->toString();
+./layers/layer_node.cpp:      out << con->toString() << ' ';
+./layers/layer_node.cpp:    names.push_back(conn->getName());
+./layers/layer_node.cpp:const std::string LayerNode::getType() const { return getLayer()->getType(); }
+./layers/layer_node.cpp:     * non-trainable layer.
+./layers/layer_node.cpp:           (run_context->getNumWeights() > 0);
+./layers/layer_node.cpp:    return static_cast<TimeDistLayer *>(layer.get())->getDistLayer();
+./layers/layer_node.cpp:    return static_cast<TimeDistLayer *>(layer.get())->getDistLayer();
+./layers/layer_node.cpp:  auto sz = run_context->getNumInputs();
+./layers/layer_node.cpp:    dims.push_back(run_context->getInput(i).getDim());
+./layers/layer_node.cpp:  auto sz = run_context->getNumOutputs();
+./layers/layer_node.cpp:    dims.push_back(run_context->getOutput(i).getDim());
+./layers/layer_node.cpp:  layer->exportTo(exporter, method);
+./layers/layer_node.cpp:    for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+./layers/layer_node.cpp:      if (run_context->isGradientLastAccess(i) && getTrainable()) {
+./layers/layer_node.cpp:        for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
+./layers/layer_node.cpp:          run_context->getWeightOptVar(i, j).read(file);
+./layers/layer_node.cpp:    for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+./layers/layer_node.cpp:      if (run_context->isGradientLastAccess(i)) {
+./layers/layer_node.cpp:        run_context->getWeight(i).read(file);
+./layers/layer_node.cpp:    for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+./layers/layer_node.cpp:      if (run_context->isGradientLastAccess(i) && getTrainable()) {
+./layers/layer_node.cpp:        if (run_context->weightHasGradient(i)) {
+./layers/layer_node.cpp:          for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
+./layers/layer_node.cpp:            run_context->getWeightOptVar(i, j).save(file);
+./layers/layer_node.cpp:    for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+./layers/layer_node.cpp:      if (run_context->isGradientLastAccess(i)) {
+./layers/layer_node.cpp:        run_context->getWeight(i).save(file);
+./layers/layer_node.cpp:  for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+./layers/layer_node.cpp:    if (run_context->isGradientLastAccess(i) && getTrainable()) {
+./layers/layer_node.cpp:      for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
+./layers/layer_node.cpp:        run_context->getWeightOptVar(i, j).initialize();
+./layers/layer_node.cpp:    dlayer->setDistLayer(std::move(layer));
+./layers/layer_node.cpp:  layer->finalize(context);
+./layers/layer_node.cpp:  loss->set(run_context->getRegularizationLoss());
+./layers/layer_node.cpp:  layer->forwarding(*run_context, training);
+./layers/layer_node.cpp:  if (!run_context->validate(getNumInputConnections() == 0, !requireLabel()))
+./layers/layer_node.cpp:    loss->set(*loss + run_context->getLoss());
+./layers/layer_node.cpp:  layer->calcDerivative(*run_context);
+./layers/layer_node.cpp:  if (!run_context->validate(getNumInputConnections() == 0, !requireLabel()))
+./layers/layer_node.cpp:    layer->calcGradient(*run_context);
+./layers/layer_node.cpp:  if (!run_context->validate(getNumInputConnections() == 0, !requireLabel()))
+./layers/layer_node.cpp:  getLayer()->setBatch(*run_context, batch);
+./layers/layer_node.cpp: * @brief   If the current layer can support in-place
+./layers/layer_node.cpp:  return layer->supportInPlace();
+./layers/layer_node.cpp:bool LayerNode::requireLabel() const { return getLayer()->requireLabel(); }
+./layers/layer_node.cpp:  // clang-format off
+./layers/layer_node.cpp:                                 e.g) layer activation type for non-activation layer. */
+./layers/layer_node.cpp:  // clang-format on
+./layers/layer_node.cpp:    auto &name = output_layer->getName();
+./layers/layer_node.cpp:    auto &idx = output_layer->getIndex();
+./layers/layer_node.cpp:  key_val_props.reserve(props->size());
+./layers/layer_node.cpp:    out << "input " << run_context->getInput(idx).getDim();
+./layers/layer_node.cpp:    out << "weight " << run_context->getWeight(idx).getDim();
+./layers/layer_node.cpp:    out << "output " << run_context->getOutput(idx).getDim();
+./layers/layer_node.cpp:    getLayer()->exportTo(e, ml::train::ExportMethods::METHOD_STRINGVECTOR);
+./layers/layer_node.cpp:      for (unsigned int i = 0; i < prop_meta->size(); ++i) {
+./layers/layer_node.cpp:        out << run_context->getWeight(idx);
+./layers/activation_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/nnstreamer_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/nnstreamer_layer.h:#include <nnstreamer-single.h>
+./layers/reduce_mean_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_impl.h:// SPDX-License-Identifier: Apache-2.0
+./layers/meson.build:if get_option('enable-nnstreamer-backbone') and get_option('platform') != 'android'
+./layers/meson.build:if get_option('enable-tflite-backbone')
+./layers/meson.build:    error('Tensorflow-Lite dependency not found')
+./layers/split_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/acti_func.h:// SPDX-License-Identifier: Apache-2.0
+./layers/acti_func.h:   * @brief   Notify that this layer will execute in-place
+./layers/acti_func.h:   * @param val True if execute in-place, else false
+./layers/lstmcell.h:// SPDX-License-Identifier: Apache-2.0
+./layers/input_layer.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./layers/common_properties.h:// SPDX-License-Identifier: Apache-2.0
+./layers/common_properties.h:   * @retval true if it contains alphanumeric and/or '-', '_', '/'
+./layers/common_properties.h:   * @retval false if it is empty or contains non-valid character
+./layers/lstm.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/lstm.cpp: * @brief  This is Long Short-Term Memory Layer Class of Neural Network
+./layers/lstm.cpp:        {feature_size}, (reverse ? max_timestep - 1 - t : t) * feature_size);
+./layers/lstm.cpp:          {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:        {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:          {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:        {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:        (reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
+./layers/lstm.cpp:                    (return_sequences ? 0 : max_timestep - 1) * unit +
+./layers/lstm.cpp:        for (int t = max_timestep - 1; t > -1; t--) {
+./layers/lstm.cpp:            (reverse ? max_timestep - 1 - t : t) * feature_size);
+./layers/lstm.cpp:              {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:              {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:            {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:              {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:              {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:            {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:            {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:            (reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
+./layers/lstm.cpp:            (reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
+./layers/lstm.cpp:      for (int t = max_timestep - 1; t > -1; t--) {
+./layers/lstm.cpp:          {feature_size}, (reverse ? max_timestep - 1 - t : t) * feature_size);
+./layers/lstm.cpp:            {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:            {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:          {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:            {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:            {unit}, (reverse ? (max_timestep - t) : (t - 1)) * unit);
+./layers/lstm.cpp:          {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:          {unit}, (reverse ? max_timestep - 1 - t : t) * unit);
+./layers/lstm.cpp:          (reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
+./layers/lstm.cpp:          (reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
+./layers/lstm.cpp:  // -> i, f, g, o
+./layers/lstm.cpp:  // weight_hh ( hidden to hidden ) : [ 1, 1, unit, NUM_GATE * unit ] -> i,
+./layers/lstm.cpp:      // 1, 1, NUM_GATE * unit ] -> i, f, g, o
+./layers/lstm.cpp:      // bias_ih ( input bias ) : [ 1, 1, 1, NUM_GATE * unit ] -> i, f, g, o
+./layers/lstm.cpp:      // bias_hh ( hidden bias ) : [ 1, 1, 1, NUM_GATE * unit ] -> i, f, g, o
+./layers/lstm.cpp:    // NUM_GATE * unit ] -> i, f, g, o
+./layers/lstm.cpp:    // -> i, f, g, o
+./layers/lstm.cpp:        // ) : [ 1, 1, 1, NUM_GATE * unit ] -> i, f, g, o
+./layers/lstm.cpp:        // reverse_bias_ih ( input bias ) : [ 1, 1, 1, NUM_GATE * unit ] ->
+./layers/lstm.cpp:        // reverse_bias_hh ( hidden bias ) : [ 1, 1, 1, NUM_GATE * unit ] ->
+./layers/lstm.cpp:          (return_sequences ? 0 : (max_timestep - 1) * unit) + timestep * unit);
+./layers/lstm.cpp:            (return_sequences ? 0 : (max_timestep - 1) * unit) +
+./layers/layer_devel.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./layers/layer_devel.h:   *            31. recurrent_activation :  string (type) - lstm
+./layers/layer_devel.h:   *            34. return_sequences :  bool (type) - lstm
+./layers/layer_devel.h:   *            35. hidden_state_activation :  string (type) - lstm
+./layers/layer_devel.h:   * @brief   If the current layer can support in-place
+./layers/layer_devel.h:  ptr->setProperty(props);
+./layers/layer_normalization_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/addition_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/lstmcell.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/lstmcell.cpp:  // - weight_ih ( input to hidden )
+./layers/lstmcell.cpp:  //  : [ 1, 1, feature_size, NUM_GATE x unit ] -> i, f, g, o
+./layers/lstmcell.cpp:  // - weight_hh ( hidden to hidden )
+./layers/lstmcell.cpp:  //  : [ 1, 1, unit, NUM_GATE x unit ] -> i, f, g, o
+./layers/lstmcell.cpp:      // - bias_h ( input bias, hidden bias are integrate to 1 bias )
+./layers/lstmcell.cpp:      //  : [ 1, 1, 1, NUM_GATE x unit ] -> i, f, g, o
+./layers/lstmcell.cpp:      // - bias_ih ( input bias )
+./layers/lstmcell.cpp:      //  : [ 1, 1, 1, NUM_GATE x unit ] -> i, f, g, o
+./layers/lstmcell.cpp:      // - bias_hh ( hidden bias )
+./layers/lstmcell.cpp:      //  : [ 1, 1, 1, NUM_GATE x unit ] -> i, f, g, o
+./layers/zoneout_lstmcell.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/zoneout_lstmcell.cpp:  // - weight_ih ( input to hidden )
+./layers/zoneout_lstmcell.cpp:  //  : [ 1, 1, feature_size, NUM_GATE x unit ] ->
+./layers/zoneout_lstmcell.cpp:  // - weight_hh ( hidden to hidden )
+./layers/zoneout_lstmcell.cpp:  //  : [ 1, 1, unit, NUM_GATE x unit ] -> i, f, g,
+./layers/zoneout_lstmcell.cpp:      // - bias_h ( input bias, hidden bias are
+./layers/zoneout_lstmcell.cpp:      //  : [ 1, 1, 1, NUM_GATE x unit ] -> i, f, g,
+./layers/zoneout_lstmcell.cpp:      // - bias_ih ( input bias )
+./layers/zoneout_lstmcell.cpp:      //  : [ 1, 1, 1, NUM_GATE x unit ] -> i, f, g,
+./layers/zoneout_lstmcell.cpp:      // - bias_hh ( hidden bias )
+./layers/zoneout_lstmcell.cpp:      //  : [ 1, 1, 1, NUM_GATE x unit ] -> i, f, g,
+./layers/zoneout_lstmcell.cpp:      hidden_state_zoneout_mask.multiply(-1.0f, prev_hidden_state_zoneout_mask);
+./layers/zoneout_lstmcell.cpp:      cell_state_zoneout_mask.multiply(-1.0f, prev_cell_state_zoneout_mask);
+./layers/multi_head_attention_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/conv2d_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/fc_layer.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./layers/flatten_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/flatten_layer.cpp: * @todo Update flatten to work in-place properly.
+./layers/layer_context.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_context.cpp:    spec.gradient_spec->name += std::to_string(idx) + Var_Grad::grad_suffix;
+./layers/layer_context.cpp:  spec.gradient_spec->ls = grad_ls;
+./layers/layer_context.cpp:  return weights[idx]->getVariableRef();
+./layers/layer_context.cpp:  if (!weights[idx]->hasGradient())
+./layers/layer_context.cpp:      "Requesting gradient for a non-trainable weight.");
+./layers/layer_context.cpp:  return weights[idx]->getGradientRef();
+./layers/layer_context.cpp:  return weights[idx]->getOptimizerVariableRef(jdx);
+./layers/layer_context.cpp:  return weights[idx]->getNumOptVariable();
+./layers/layer_context.cpp:  return weights[idx]->getRegularizationLoss();
+./layers/layer_context.cpp:  return weights[idx]->getName();
+./layers/layer_context.cpp:  return weights[idx]->hasGradient();
+./layers/layer_context.cpp:  return outputs[idx]->getVariableRef();
+./layers/layer_context.cpp:  return outputs[idx]->getVariableRef();
+./layers/layer_context.cpp: * @return Tensor Read-only output grad tensor
+./layers/layer_context.cpp:  if (!outputs[idx]->hasGradient()) {
+./layers/layer_context.cpp:    return Tensor(outputs[idx]->getDim(), true, Tensor::Initializer::ZEROS);
+./layers/layer_context.cpp:  return const_cast<RunLayerContext *>(this)->getOutputGradUnsafe(idx);
+./layers/layer_context.cpp:  return outputs[idx]->hasGradient();
+./layers/layer_context.cpp:  return outputs[idx]->getGradientRef();
+./layers/layer_context.cpp:  return inputs[idx]->getVariableRef();
+./layers/layer_context.cpp:  return inputs[idx]->getVariableRef();
+./layers/layer_context.cpp:  if (!inputs[idx]->hasGradient()) {
+./layers/layer_context.cpp:      "Requesting gradient for a non-trainable tensor.");
+./layers/layer_context.cpp:  return inputs[idx]->getGradientRef();
+./layers/layer_context.cpp:  return inputs[idx]->hasGradient();
+./layers/layer_context.cpp:  return tensors[idx]->getVariableRef();
+./layers/layer_context.cpp:  return tensors[idx]->getVariableRef();
+./layers/layer_context.cpp:  if (!tensors[idx]->hasGradient())
+./layers/layer_context.cpp:      "Requesting gradient for a non-trainable tensor.");
+./layers/layer_context.cpp:  return tensors[idx]->getGradientRef();
+./layers/layer_context.cpp:  if (!tensors[idx]->hasGradient())
+./layers/layer_context.cpp:      "Requesting gradient for a non-trainable tensor.");
+./layers/layer_context.cpp:  return tensors[idx]->getGradientRef();
+./layers/layer_context.cpp:  return tensors[idx]->hasGradient();
+./layers/layer_context.cpp:  return weights[idx]->isDependent();
+./layers/layer_context.cpp:  return weights[idx]->isGradientFirstAccess();
+./layers/layer_context.cpp:  return weights[idx]->isGradientLastAccess();
+./layers/layer_context.cpp:  return weights[idx]->isGradientClipByGlobalNorm();
+./layers/layer_context.cpp:  return tensors[idx]->getName();
+./layers/layer_context.cpp:    vg->setBatchSize(batch);
+./layers/layer_context.cpp:    vg->setBatchSize(batch);
+./layers/layer_context.cpp:  tensors[idx]->setBatchSize(batch);
+./layers/layer_context.cpp:  return outputs[idx]->getGradientRef().isAllocated();
+./layers/layer_context.cpp:    return outputs[idx]->getGradientRef();
+./layers/layer_context.cpp:  return !inputs[0]->getVariable().empty();
+./layers/layer_context.cpp:   * @note a common mistake when using run_context is re-assigning the tensor
+./layers/layer_context.cpp:  if (tensor_map.empty() || !tensor_map[inputs[0]->getName()]) {
+./layers/layer_context.cpp:        tensor_map[val->getName()] = val->getVariableRef().getData();
+./layers/layer_context.cpp:        tensor_map[val->getGradientName()] = val->getGradientRef().getData();
+./layers/layer_context.cpp:  matcher = [this](const Var_Grad *val, bool skip_grad) -> bool {
+./layers/layer_context.cpp:    if (val->getName().empty() ||
+./layers/layer_context.cpp:        (val->hasGradient() && val->getGradientName().empty()))
+./layers/layer_context.cpp:    if (tensor_map.find(val->getName()) == tensor_map.end())
+./layers/layer_context.cpp:       * Disabled because of in-place input layer. Enable this later.
+./layers/layer_context.cpp:       * tensor_map[val->getName()] != val->getVariableRef().getData())
+./layers/layer_context.cpp:        (tensor_map.find(val->getGradientName()) == tensor_map.end()))
+./layers/reduce_mean_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/centroid_knn.h:// SPDX-License-Identifier: Apache-2.0
+./layers/centroid_knn.h: * @brief Centroid KNN layer which takes centroid and do k-nearest neighbor
+./layers/gru.h:// SPDX-License-Identifier: Apache-2.0
+./layers/attention_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/attention_layer.cpp:    throw std::runtime_error("Attention layer needs 2-3 inputs.");
+./layers/dropout.h:// SPDX-License-Identifier: Apache-2.0
+./layers/dropout.h:    epsilon(1e-3) {}
+./layers/dropout.h:   * @todo Enable in-place support once supported by manager
+./layers/positional_encoding_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/multiout_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/tflite_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/acti_func.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/acti_func.cpp:                          Tensor const &incoming_derivative) -> Tensor & {
+./layers/acti_func.cpp:                            Tensor const &incoming_derivative) -> Tensor & {
+./layers/acti_func.cpp:                            Tensor const &incoming_derivative) -> Tensor & {
+./layers/acti_func.cpp:  _act_fn = [activation_fn](Tensor const &x, Tensor &hidden) -> Tensor & {
+./layers/acti_func.cpp:                            Tensor const &incoming_derivative) -> Tensor & {
+./layers/acti_func.cpp:                            Tensor const &incoming_derivative) -> Tensor & {
+./layers/acti_func.cpp:    this->setActivation(tanhFloat, tanhPrime);
+./layers/acti_func.cpp:    this->setActivation(sigmoid, sigmoidPrime);
+./layers/acti_func.cpp:    this->setActivation(softmax, softmaxPrime);
+./layers/acti_func.cpp:    this->setActivation(relu, reluPrime);
+./layers/acti_func.cpp:    this->setActivation(leakyRelu, leakyReluPrime);
+./layers/acti_func.cpp:    this->setActivation(swish, swishPrime);
+./layers/acti_func.cpp:    this->setActivation(gelu, geluPrime);
+./layers/acti_func.cpp:    this->setActivation(no_op, no_op_prime);
+./layers/acti_func.cpp:   * shiftx_logit = logit - max_batch(logit)
+./layers/acti_func.cpp:    saxpy(width, -1, tmp.getData(), 1, ptr, 1);
+./layers/acti_func.cpp:                    (1.0f - output_data[bch_offset + w1]);
+./layers/acti_func.cpp:                -output_data[bch_offset + w2] * output_data[bch_offset + w1];
+./layers/acti_func.cpp:float ActiFunc::sigmoid(float x) { return 1.0f / (1.0f + exp_util(-x)); }
+./layers/acti_func.cpp:  return x * (1.0f - x);
+./layers/acti_func.cpp:  return 2.0 * sigmoid(2.0 * x) - 1.0;
+./layers/acti_func.cpp:  return 1.0f - x * x;
+./layers/acti_func.cpp:  t_out.apply([&](float x) { return 1 - x; }, tmp);
+./layers/acti_func.cpp:                    x * ((2 / sqrt(M_PI)) * exp(-pow(x * tmp, 2))) * tmp);
+./layers/acti_func.cpp:    throw std::runtime_error("Error setting activation layer to work in-place");
+./layers/gru.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/gru.cpp: * h_prev --------d1------->[*]-------d0----->[+]---d0--> h
+./layers/gru.cpp: *          |  |             +-----[1-]------>[*]
+./layers/gru.cpp: *          | [*]<---+ d15   |d5               | d6
+./layers/gru.cpp: *          +- |--+------|---+    |          |    |
+./layers/gru.cpp: *             +---------|--------|----------+    |
+./layers/gru.cpp: *   xs------------------+--------+---------------+
+./layers/gru.cpp:  epsilon(1e-3) {
+./layers/gru.cpp:  // - weight_ih ( input to hidden )
+./layers/gru.cpp:  // weight_ih_dim : [ 1, 1, feature_size, NUMGATE * unit ] -> z, r, g
+./layers/gru.cpp:  // - weight_hh ( hidden to hidden )
+./layers/gru.cpp:  // weight_hh_dim : [ 1, 1, unit, NUM_GATE * unit ] -> z, r, g
+./layers/gru.cpp:      // - bias_h ( input bias, hidden bias are integrate to 1 bias )
+./layers/gru.cpp:      // bias_h_dim : [ 1, 1, 1, NUM_GATE * unit ] -> z, r, g
+./layers/gru.cpp:      // - bias_ih ( input bias )
+./layers/gru.cpp:      // bias_ih_dim : [ 1, 1, 1, NUM_GATE * unit ] -> z, r, g
+./layers/gru.cpp:      // - bias_hh ( hidden bias )
+./layers/gru.cpp:      // bias_hh_dim : [ 1, 1, 1, NUM_GATE * unit ] -> z, r, g
+./layers/gru.cpp:  // h_nx = (1-zt)*gt + zt*h_prev
+./layers/gru.cpp:        prev_hs = oslice.getSharedDataTensor({unit}, (t - 1) * unit);
+./layers/gru.cpp:      temp = zt.multiply(-1.0).add(1.0);
+./layers/gru.cpp:        {unit}, batch * unit * max_timestep + (max_timestep - 1) * unit);
+./layers/gru.cpp:        {unit}, batch * unit * max_timestep + (max_timestep - 1) * unit);
+./layers/gru.cpp:    for (unsigned int t = max_timestep; t-- > 0;) {
+./layers/gru.cpp:        prev_hs = hs_t.getSharedDataTensor({unit}, (t - 1) * unit);
+./layers/gru.cpp:      if (t < max_timestep - 1) {
+./layers/gru.cpp:      zt.multiply(-1.0, dhg);
+./layers/conv1d_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/common_properties.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/common_properties.cpp:  static std::regex allowed("[a-zA-Z0-9][-_./a-zA-Z0-9]*");
+./layers/common_properties.cpp:    /// check if every padding is non-negative integer
+./layers/common_properties.cpp:      unsigned int eff_kernel = (kernel_ - 1) * dilation + 1;
+./layers/common_properties.cpp:      auto out = (input_ + stride - 1) / stride;
+./layers/common_properties.cpp:      auto req_input = (out - 1) * stride + eff_kernel;
+./layers/common_properties.cpp:      return req_input >= input_ ? req_input - input_ : 0;
+./layers/common_properties.cpp:    return {pad_top, pad_vertical - pad_top, pad_left,
+./layers/common_properties.cpp:            pad_horizontal - pad_left};
+./layers/common_properties.cpp:    /// check if every padding is non-negative integer
+./layers/common_properties.cpp:    unsigned int eff_kernel = (kernel - 1) * dilation + 1;
+./layers/common_properties.cpp:    auto out = (input + stride - 1) / stride;
+./layers/common_properties.cpp:    auto req_input = (out - 1) * stride + eff_kernel;
+./layers/common_properties.cpp:    return req_input >= input ? req_input - input : 0;
+./layers/common_properties.cpp:    return {pad_left, pad_horizontal - pad_left};
+./layers/mol_attention_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/mol_attention_layer.h:    mol_props; /**< mol attention layer properties : unit - number of output
+./layers/multi_head_attention_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/multi_head_attention_layer.cpp:  epsilon(1e-3) {
+./layers/multi_head_attention_layer.cpp:    //   attention_mask.setValue(-1e9);
+./layers/multi_head_attention_layer.cpp:    //   attention_mask.multiply_i(-1e9);
+./layers/multiout_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_context.h:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_context.h:    return weights_spec.size() - 1;
+./layers/layer_context.h:    return weights_spec.size() - 1;
+./layers/layer_context.h:    auto prefix_ = private_ ? this->name : this->prefix;
+./layers/layer_context.h:    return tensors_spec.size() - 1;
+./layers/layer_context.h:    return tensors_spec.size() - 1;
+./layers/layer_context.h:   * @brief   check if the layer is expected to run in-place
+./layers/layer_context.h:   * @return true if in-place, else false
+./layers/layer_context.h:  bool in_place;             /**< if the layer is expected to run in-place */
+./layers/layer_context.h:    tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
+./layers/layer_context.h:   * @param in_place_ execution in-place of the layer
+./layers/layer_context.h:   * @return Read-only output grad tensor, if derivative does not have
+./layers/layer_context.h:   * accessible for non-trainable weights as well. This is in terms of execution
+./layers/layer_context.h:   * accessible for non-trainable weights as well. This is in terms of execution
+./layers/layer_context.h:   * @note loss value is only used for loss layers. For non-loss layers, setting
+./layers/layer_context.h:   * @brief   check if the layer is expected to run in-place
+./layers/layer_context.h:   * @return true if in-place, else false
+./layers/layer_context.h:  bool in_place; /**< if the layer is expected to run in-place */
+./layers/preprocess_translate_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/rnn.h:// SPDX-License-Identifier: Apache-2.0
+./layers/centroid_knn.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/centroid_knn.cpp:    return -a.subtract(b).l2norm();
+./layers/conv2d_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/conv2d_layer.cpp:  unsigned eff_k_height = (k_height - 1) * dilation[0] + 1;
+./layers/conv2d_layer.cpp:  unsigned eff_k_width = (k_width - 1) * dilation[1] + 1;
+./layers/conv2d_layer.cpp:  int h_stride_end = im_eff_height - eff_k_height - pt;
+./layers/conv2d_layer.cpp:  int w_stride_end = im_eff_width - eff_k_width - pl;
+./layers/conv2d_layer.cpp:  for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
+./layers/conv2d_layer.cpp:    for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
+./layers/conv2d_layer.cpp:  //   out_height -= 2;
+./layers/conv2d_layer.cpp:  //     for (unsigned int hs = 0; hs <= height - eff_k_height; hs +=
+./layers/conv2d_layer.cpp:  //       for (unsigned int ws = 0; ws <= width - eff_k_width; ws +=
+./layers/conv2d_layer.cpp:  //             float val = in.getValue(0, c, h - ph, w - pw);
+./layers/conv2d_layer.cpp:  unsigned int eff_k_height = (k_height - 1) * dilation[0] + 1;
+./layers/conv2d_layer.cpp:  unsigned int eff_k_width = (k_width - 1) * dilation[1] + 1;
+./layers/conv2d_layer.cpp:  unsigned int out_height = (height - eff_k_height) / mstride[0] + 1;
+./layers/conv2d_layer.cpp:  unsigned int out_width = (width - eff_k_width) / mstride[1] + 1;
+./layers/conv2d_layer.cpp:  int h_stride_end = height - eff_k_height - pt;
+./layers/conv2d_layer.cpp:  int w_stride_end = width - eff_k_width - pl;
+./layers/conv2d_layer.cpp:  for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
+./layers/conv2d_layer.cpp:        for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
+./layers/conv2d_layer.cpp:  unsigned int eff_k_height = (kernel_size[0] - 1) * dilation[0] + 1;
+./layers/conv2d_layer.cpp:  unsigned int eff_k_width = (kernel_size[1] - 1) * dilation[1] + 1;
+./layers/conv2d_layer.cpp:  out_dim.height((eff_in_height - eff_k_height) / stride[0] + 1);
+./layers/conv2d_layer.cpp:  out_dim.width((eff_in_width - eff_k_width) / stride[1] + 1);
+./layers/conv2d_layer.cpp:  NNTR_THROW_IF(eff_in_height - padding[0] - kernel_size[0] > IM ||
+./layers/conv2d_layer.cpp:                  eff_in_width - padding[2] - kernel_size[1] > IM,
+./layers/conv2d_layer.cpp:   *                        +------|------|------+
+./layers/conv2d_layer.cpp:   *                        |------|------|------|
+./layers/conv2d_layer.cpp:   * [filter_size (height)] |------|------|------|
+./layers/conv2d_layer.cpp:   *                        |------|------|------|
+./layers/conv2d_layer.cpp:   *                        +------|------|------+
+./layers/conv2d_layer.cpp:   *                      +-|-|-|-|      |-|-|-|-+
+./layers/conv2d_layer.cpp:   *   -> [Channel ( = filter_size = output_dim.channel )]
+./layers/conv2d_layer.cpp:  /// filter_kernel^T X derivaitive  -> column matrix
+./layers/conv2d_layer.cpp:  /// input -(im2col)-> column_matrix -> filter x (column_matrix) = output
+./layers/reshape_layer.h:// SPDX-License-Identifier: Apache-2.0
+./layers/time_dist.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/time_dist.cpp:  dist_layer->finalize(dist_context);
+./layers/time_dist.cpp:  if (dist_layer->requireLabel() &&
+./layers/time_dist.cpp:                   dist_layer->requireLabel() &&
+./layers/time_dist.cpp:    if (dist_layer->requireLabel() &&
+./layers/time_dist.cpp:    dist_layer->forwarding(dist_context, training);
+./layers/time_dist.cpp:    dist_layer->calcDerivative(dist_context);
+./layers/time_dist.cpp:  // Even if the dist_layer->getNumWeights() == 0, We do transpose here
+./layers/time_dist.cpp:    dist_layer->calcGradient(dist_context);
+./layers/time_dist.cpp:    dist_layer->setBatch(dist_context, batch);
+./layers/lstmcell_core.h:// SPDX-License-Identifier: Apache-2.0
+./layers/time_dist.h:// SPDX-License-Identifier: Apache-2.0
+./layers/time_dist.h:    dist_layer->exportTo(exporter, method);
+./layers/time_dist.h:    return dist_layer->supportBackwarding();
+./layers/time_dist.h:      dist_layer->setProperty(values);
+./layers/time_dist.h:    return dist_layer->requireLabel();
+./layers/layer_normalization_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/layer_normalization_layer.cpp:  /** caches the deviation -> input - avg(input) */
+./layers/layer_normalization_layer.cpp:  variance.pow(-0.5f, inv_std_dev);
+./layers/nnstreamer_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/preprocess_flip_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/preprocess_flip_layer.cpp:                   *input_.getAddress(b, c, h, width - w - 1));
+./layers/preprocess_flip_layer.cpp:                   *input_.getAddress(b, c, height - h - 1, w));
+./layers/#concat_layer.cpp#:meson// SPDX-License-Identifier: Apache-2.0
+./layers/#concat_layer.cpp#:           "along non-concat dimension";
+./layers/rnncell.h:// SPDX-License-Identifier: Apache-2.0
+./layers/pooling2d_layer.cpp:// SPDX-License-Identifier: Apache-2.0
+./layers/pooling2d_layer.cpp:  NNTR_THROW_IF(eff_in_height - pt - pool_size[0] > IM ||
+./layers/pooling2d_layer.cpp:                  eff_in_width - pl - pool_size[1] > IM,
+./layers/pooling2d_layer.cpp:  out_dim.height((eff_in_height - pool_size[0]) / stride[0] + 1);
+./layers/pooling2d_layer.cpp:  out_dim.width((eff_in_width - pool_size[1]) / stride[1] + 1);
+./layers/pooling2d_layer.cpp:   * // clang-format off
+./layers/pooling2d_layer.cpp:   * // clang-format on
+./layers/pooling2d_layer.cpp:          /// pool_helper = -1 means the max idx was at the padding, so no need
+./layers/pooling2d_layer.cpp:          if (*iter != -1) {
+./layers/pooling2d_layer.cpp:    int height_stride_end = height - p_height + pt;
+./layers/pooling2d_layer.cpp:    int width_stride_end = width - p_width + pl;
+./layers/pooling2d_layer.cpp:        for (int j = -pt; j <= height_stride_end; j += stride[0]) {
+./layers/pooling2d_layer.cpp:          for (int k = -pl; k <= width_stride_end; k += stride[1]) {
+./layers/pooling2d_layer.cpp:      int cur_max_idx = -1;
+./layers/pooling2d_layer.cpp:      int cnt = (eff_end_h - eff_start_h) * (eff_end_w - eff_start_w);
+./layers/pooling2d_layer.cpp:  int height_stride_end = height - patch_height - pt;
+./layers/pooling2d_layer.cpp:  int width_stride_end = width - patch_width - pl;
+./layers/pooling2d_layer.cpp:    for (int j = -pt; j <= height_stride_end; j += stride[0]) {
+./layers/pooling2d_layer.cpp:      for (int k = -pl; k <= width_stride_end; k += stride[1]) {
+./models/circle_plus/circle_plus_generated.h:             "Non-compatible flatbuffers version included");
+./models/circle_plus/circle_plus_generated.h:  if (values->size() != types->size()) return false;
+./models/circle_plus/circle_plus_generated.h:  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+./models/circle_plus/circle_plus_generated.h:        verifier,  values->Get(i), types->GetEnum<LayerOptions>(i))) {
+./models/circle_plus/circle_plus_generated.h:  if (values->size() != types->size()) return false;
+./models/circle_plus/circle_plus_generated.h:  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+./models/circle_plus/circle_plus_generated.h:        verifier,  values->Get(i), types->GetEnum<LROptions>(i))) {
+./models/circle_plus/circle_plus_generated.h:  if (values->size() != types->size()) return false;
+./models/circle_plus/circle_plus_generated.h:  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+./models/circle_plus/circle_plus_generated.h:        verifier,  values->Get(i), types->GetEnum<OptimizerOptions>(i))) {
+./models/circle_plus/circle_plus_generated.h:  if (values->size() != types->size()) return false;
+./models/circle_plus/circle_plus_generated.h:  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+./models/circle_plus/circle_plus_generated.h:        verifier,  values->Get(i), types->GetEnum<LossOptions>(i))) {
+Binary file ./models/circle_plus/test matches
+./models/circle_plus/nntrainer.fbs://Tensor Mapping : name - index
+./models/circle_plus/circle_plus.fbs://Tensor Mapping : name - index
+./models/circle_plus/test.cpp:    std::cout << model->name()->c_str()<<" " <<model->epochs() <<" " <<model->batch_size() <<std::endl;
+./models/circle_plus/test.cpp:    std::cout << model->name()->c_str()<<" " <<model->epochs() <<" " <<model->batch_size() <<std::endl;
+./models/model_common_properties.cpp:// SPDX-License-Identifier: Apache-2.0
+./models/model_loader.cpp:// SPDX-License-Identifier: Apache-2.0
+./models/model_loader.cpp:    opt_wrapped->setLearningRateScheduler(std::move(lrs));
+./models/model_loader.cpp:    model.opt->setProperty(optimizer_prop);
+./models/model_loader.cpp:  auto try_parse_datasetsection_for_backward_compatibility = [&]() -> int {
+./models/model_loader.cpp:                             bool required) -> int {
+./models/model_loader.cpp:        model.data_buffers[static_cast<int>(dt)]->setProperty({bufsizepros});
+./models/model_loader.cpp:                                       DatasetModeType type) -> int {
+./models/model_loader.cpp:      db->setProperty(properties);
+./models/model_loader.cpp:  properties.reserve(num_entries - 1);
+./models/model_loader.cpp:    auto graph_representation = ini_interpreter->deserialize(ini_file);
+./models/model_loader.cpp:  model_file_context->setWorkingDirectory(base_path);
+./models/execution_mode.h:// SPDX-License-Identifier: Apache-2.0
+./models/model_loader.h:// SPDX-License-Identifier: Apache-2.0
+./models/model_loader.h:    return model_file_context->getWorkingPath(app_context_resolved_path);
+./models/dynamic_training_optimization.h:// SPDX-License-Identifier: Apache-2.0
+./models/dynamic_training_optimization.h: * updating the weights (which can be non-trivial with bigger weights and
+./models/neuralnet.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./models/neuralnet.h:#include <nntrainer-api-common.h>
+./models/neuralnet.h:    std::vector<NodeType>; /** topological sorted, iterable 1-D list of nodes */
+./models/neuralnet.h:  float getLearningRate() { return opt->getLearningRate(0); };
+./models/neuralnet.h:   * @brief     Update graph to make batch normalization in-place
+./models/neuralnet.h:   * @brief Enable dynamic fine-tuning optimization
+./models/neuralnet.h:   * @param mode dynamic fine-tuning optimization mode. Supported modes are
+./models/neuralnet.h:   * @brief Disable dynamic fine-tuning optimization
+./models/neuralnet.h:    // clang-format off
+./models/neuralnet.h:    // clang-format on
+./models/neuralnet.h:  DynamicTrainingOptimization dynamic_training_opt; /**< Dynamic fine-tuning
+./models/neuralnet.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./models/neuralnet.cpp:    graph_representation = realizer->realize(graph_representation);
+./models/neuralnet.cpp:      node->setProperty({"clip_grad_by_norm=" + to_string(prop)});
+./models/neuralnet.cpp:    opt->finalize();
+./models/neuralnet.cpp:        return opt->getOptimizerVariableDim(dim);
+./models/neuralnet.cpp:                              bool training) -> void {
+./models/neuralnet.cpp:    PROFILE_MEM_ANNOTATE("Forwarding for layer: " + node->getName());
+./models/neuralnet.cpp:    auto f = std::get<0>(node->getExecutionOrder());
+./models/neuralnet.cpp:    node->forwarding(training);
+./models/neuralnet.cpp:  NNTR_THROW_IF(input[0]->batch() != current_batch ||
+./models/neuralnet.cpp:                  (!label.empty() && label[0]->batch() != current_batch),
+./models/neuralnet.cpp:    << " input_batch: " << input[0]->batch()
+./models/neuralnet.cpp:    << " label_batch: " << label[0]->batch()
+./models/neuralnet.cpp:                              int iteration) -> void {
+./models/neuralnet.cpp:    model_graph.flushCacheExcept(std::get<1>(node->getExecutionOrder()));
+./models/neuralnet.cpp:    PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
+./models/neuralnet.cpp:    if (node->getTrainable()) {
+./models/neuralnet.cpp:        node->calcGradient();
+./models/neuralnet.cpp:      // auto &layer = node->getObject();
+./models/neuralnet.cpp:      //   layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
+./models/neuralnet.cpp:        node->calcGradient();
+./models/neuralnet.cpp:    model_graph.flushCacheExcept(std::get<2>(node->getExecutionOrder()));
+./models/neuralnet.cpp:    PROFILE_MEM_ANNOTATE("CalcDerivative: " + node->getName());
+./models/neuralnet.cpp:    if (node->needsCalcDerivative())
+./models/neuralnet.cpp:      node->calcDerivative();
+./models/neuralnet.cpp:    model_graph.flushCacheExcept(std::get<3>(node->getExecutionOrder()));
+./models/neuralnet.cpp:    PROFILE_MEM_ANNOTATE("ApplyGradient: " + node->getName());
+./models/neuralnet.cpp:                                          opt_->getLearningRate(iteration));
+./models/neuralnet.cpp:          opt_->applyGradient(opt_context);
+./models/neuralnet.cpp:    [opt_ = opt.get()](Weight &w, int iteration) -> void {
+./models/neuralnet.cpp:                                    opt_->getLearningRate(iteration));
+./models/neuralnet.cpp:    opt_->applyGradient(opt_context);
+./models/neuralnet.cpp:      (*iter)->save(model_file);
+./models/neuralnet.cpp:    if (opt && istrequal(opt->getType(), "adam")) {
+./models/neuralnet.cpp:        (*iter)->save(model_file, true);
+./models/neuralnet.cpp:      (*iter)->read(model_file);
+./models/neuralnet.cpp:      if (opt && istrequal(opt->getType(), "adam")) {
+./models/neuralnet.cpp:            (*iter)->read(model_file, true);
+./models/neuralnet.cpp:    loss += (*iter)->getLoss();
+./models/neuralnet.cpp:      s.setEntry("type", obj_ptr->getType());
+./models/neuralnet.cpp:    return buffer && buffer->isSerializable(
+./models/neuralnet.cpp:    if (input_dim[dim] != X[dim]->getDim()) {
+./models/neuralnet.cpp:      ss << X[dim]->getDim();
+./models/neuralnet.cpp:  if (model_graph.getBatchSize() != X[0]->batch()) {
+./models/neuralnet.cpp:    model_graph.setBatchSize(X[0]->batch());
+./models/neuralnet.cpp:      (*iter)->clearOptVar();
+./models/neuralnet.cpp:      buffer->startFetchWorker(in_dims, label_dims, shuffle);
+./models/neuralnet.cpp:      ScopedView<Iteration> iter_view = buffer->fetch();
+./models/neuralnet.cpp:                << " - Training Loss: " << stat.loss;
+./models/neuralnet.cpp:      ml_logi("# %d / %d - Training Loss: %f", epoch_idx, getEpochs(),
+./models/neuralnet.cpp:              << "% - Validation Loss : " << stat.loss << " ]";
+./models/neuralnet.cpp:    ml_logi("[ Accuracy: %.2f %% - Validataion Loss: %.5f", stat.accuracy,
+./models/neuralnet.cpp:      --epoch_idx;
+./models/neuralnet.cpp:        l_node->cloneConfiguration()));
+./models/neuralnet.cpp:  this->data_buffers[static_cast<int>(mode)] = data_buffer;
+./models/neuralnet.cpp:    nodes.push_back(node->cloneConfiguration());
+./models/neuralnet.cpp:    nodes = realizer->realize(nodes);
+./models/neuralnet.cpp:                                           : str.substr(0, column_width - 1);
+./models/neuralnet.cpp:        if (iter->getInputDimensions().empty()) {
+./models/neuralnet.cpp:          dim_property.set(iter->getInputDimensions()[0]);
+./models/neuralnet.cpp:          iter->getInputConnections();
+./models/neuralnet.cpp:          out, {iter->getName(), iter->getType(), first_dim, first_input_name});
+./models/neuralnet.cpp:          dim_property.set(iter->getInputDimensions()[i]);
+./models/neuralnet.cpp:                           iter == model_graph.cend() - 1 ? '=' : '-')
+./models/neuralnet.cpp:          return node->hasInputShapeProperty() or
+./models/neuralnet.cpp:                               return node->getName() == conn.getName();
+./models/neuralnet.cpp:          iter->getInputConnections();
+./models/neuralnet.cpp:        //          : (iter - 1)->getName())
+./models/neuralnet.cpp:        print_graph_layer_info(out, {iter->getName(), iter->getType(), "", ""});
+./models/neuralnet.cpp:                           iter == model_graph.cend() - 1 ? '=' : '-')
+./models/neuralnet.cpp:    (*iter)->printPreset(out, layerPrintPreset);
+./models/neuralnet.cpp:    fn(*ln, std::forward<RunLayerContext &>(ln->getRunContext()), user_data);
+./models/#neuralnet.h#: *   http://www.apache.org/licenses/LICENSE-2.0
+./models/#neuralnet.h#:#include <nntrainer-api-common.h>
+./models/#neuralnet.h#:    std::vector<NodeType>; /** topological sorted, iterable 1-D list of nodes */
+./models/#neuralnet.h#:  float getLearningRate() { return opt->getLearningRate(0); };
+./models/#neuralnet.h#:   * @brief     Update graph to make batch normalization in-place
+./models/#neuralnet.h#:   * @brief Enable dynamic fine-tuning optimization
+./models/#neuralnet.h#:   * @param mode dynamic fine-tuning optimization mode. Supported modes are
+./models/#neuralnet.h#:   * @brief Disable dynamic fine-tuning optimization
+./models/#neuralnet.h#:    // clang-format off
+./models/#neuralnet.h#:    // clang-format on
+./models/#neuralnet.h#:  DynamicTrainingOptimization dynamic_training_opt; /**< Dynamic fine-tuning
+./models/dynamic_training_optimization.cpp:// SPDX-License-Identifier: Apache-2.0
+./models/dynamic_training_optimization.cpp:  epsilon(1e-7),
+./models/dynamic_training_optimization.cpp:  return checkIfApply(reduced_ratio, (float)opt->getLearningRate(iteration));
+./models/dynamic_training_optimization.cpp:  float reduced_derivative = reduce_op(output->getGradientRef());
+./models/dynamic_training_optimization.cpp:  float reduced_input = reduce_op(input->getVariableRef());
+./models/model_common_properties.h:// SPDX-License-Identifier: Apache-2.0
+./nntrainer_error.h:// SPDX-License-Identifier: Apache-2.0
+./nntrainer_error.h:#define ML_ERROR_BAD_ADDRESS (-EFAULT)
+./nntrainer_error.h:#define ML_ERROR_RESULT_OUT_OF_RANGE (-ERANGE)
+./nntrainer_error.h:#include <ml-api-common.h>
+./nntrainer_error.h: https://gitlab.freedesktop.org/dude/gst-plugins-base/commit/89095e7f91cfbfe625ec2522da49053f1f98baf8
+./nntrainer_error.h:#define _ERROR_UNKNOWN (-1073741824LL)
+./nntrainer_error.h:#define TIZEN_ERROR_PERMISSION_DENIED (-EACCES)
+./nntrainer_error.h:#define TIZEN_ERROR_OUT_OF_MEMORY (-ENOMEM)
+./nntrainer_error.h:  ML_ERROR_INVALID_PARAMETER = -EINVAL, /**< Invalid parameter */
+./nntrainer_error.h:    -EAGAIN, /**< The pipeline is not ready, yet (not negotiated, yet) */
+./nntrainer_error.h:  ML_ERROR_PERMISSION_DENIED = -EACCES, /**< Permission denied */
+./nntrainer_error.h:  ML_ERROR_OUT_OF_MEMORY = -ENOMEM,     /**< Out of memory (Since 6.0) */
+./nntrainer_logger.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./nntrainer_logger.cpp:                std::chrono::system_clock::now().time_since_epoch() - start)
+./nntrainer_logger.cpp:    ss << std::dec << (now.tm_year + 1900) << '-' << std::setfill('0')
+./nntrainer_logger.cpp:       << std::setw(2) << (now.tm_mon + 1) << '-' << std::setfill('0')
+./nntrainer_logger.cpp:      n += abs(final_n - n + 1);
+./nntrainer_logger.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./nntrainer_log.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./optimizers/plugged_optimizer.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/plugged_optimizer.h:      dynamic_cast<nntrainer::Optimizer *>(pluggable->createfunc())),
+./optimizers/plugged_optimizer.h:    destroy_func(pluggable->destroyfunc) {
+./optimizers/plugged_optimizer.h:    return optimizer_devel->getDefaultLearningRate();
+./optimizers/plugged_optimizer.h:    optimizer_devel->applyGradient(context);
+./optimizers/plugged_optimizer.h:    optimizer_devel->setProperty(values);
+./optimizers/plugged_optimizer.h:  void finalize() override { optimizer_devel->finalize(); }
+./optimizers/plugged_optimizer.h:  void read(std::ifstream &file) override { optimizer_devel->read(file); }
+./optimizers/plugged_optimizer.h:  void save(std::ofstream &file) override { optimizer_devel->save(file); }
+./optimizers/plugged_optimizer.h:    return optimizer_devel->getOptimizerVariableDim(dim);
+./optimizers/plugged_optimizer.h:    return optimizer_devel->getType();
+./optimizers/lr_scheduler_exponential.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/lr_scheduler.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/lr_scheduler.h:   * @note this is non-const function intentionally.
+./optimizers/lr_scheduler.h:   * - learning_rate : float
+./optimizers/lr_scheduler.h:   * - learning_rate : float
+./optimizers/lr_scheduler.h:   * - decay_rate : float,
+./optimizers/lr_scheduler.h:   * - decay_steps : float,
+./optimizers/lr_scheduler.h:   * @details   This function accepts vector of properties in the format -
+./optimizers/lr_scheduler.h:  ptr->setProperty(props);
+./optimizers/sgd.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/adam.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/lr_scheduler_step.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/lr_scheduler_constant.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_context.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_devel.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_devel.h:  ptr->setProperty(props);
+./optimizers/adam.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/adam.cpp:  eps.set(1.0e-7f);
+./optimizers/adam.cpp:    return 1.0f - pow(f, iteration + 1);
+./optimizers/adam.cpp:  float biasCorrection1 = 1 - pow(beta1, iteration + 1);
+./optimizers/adam.cpp:  float biasCorrection2 = 1 - pow(beta2, iteration + 1);
+./optimizers/adam.cpp:  wm.add_i(x_grad, 1.0f - beta1);
+./optimizers/adam.cpp:  wv.add_i(x_grad.multiply(x_grad), 1.0f - beta2);
+./optimizers/optimizer_context.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_context.cpp:  return weight->getVariableRef();
+./optimizers/optimizer_context.cpp:  return weight->getGradientRef();
+./optimizers/optimizer_context.cpp:  return weight->getOptimizerVariableRef(idx);
+./optimizers/optimizer_context.cpp:  weight->applyGradient(lr);
+./optimizers/lr_scheduler_step.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/lr_scheduler_step.cpp:    return learning_rates[upper - iterations.begin()];
+./optimizers/optimizer_wrapped.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_wrapped.cpp:  opt_wrapped->setProperty(properties);
+./optimizers/optimizer_wrapped.cpp:  std::get<props::LearningRate>(props).set(optimizer->getDefaultLearningRate());
+./optimizers/optimizer_wrapped.cpp:  return optimizer->getType();
+./optimizers/optimizer_wrapped.cpp:  optimizer->setProperty(remain_props);
+./optimizers/optimizer_wrapped.cpp:  return lr_sched->getLearningRate(iteration);
+./optimizers/optimizer_wrapped.cpp:  optimizer->applyGradient(context);
+./optimizers/optimizer_wrapped.cpp:  optimizer->exportTo(exporter, method);
+./optimizers/optimizer_wrapped.cpp:  lr_sched->exportTo(exporter, method);
+./optimizers/optimizer_wrapped.cpp:        lr_sched->setProperty({"decay_rate=" + std::to_string(props_dr.get())});
+./optimizers/optimizer_wrapped.cpp:        lr_sched->setProperty(
+./optimizers/optimizer_wrapped.cpp:    lr_sched->setProperty({"learning_rate=" + std::to_string(props_lr.get())});
+./optimizers/optimizer_wrapped.cpp:  lr_sched->finalize();
+./optimizers/optimizer_wrapped.cpp:  optimizer->finalize();
+./optimizers/optimizer_wrapped.cpp:void OptimizerWrapped::read(std::ifstream &file) { optimizer->read(file); }
+./optimizers/optimizer_wrapped.cpp:void OptimizerWrapped::save(std::ofstream &file) { optimizer->save(file); }
+./optimizers/optimizer_wrapped.cpp:  return optimizer->getOptimizerVariableDim(dim);
+./optimizers/optimizer_devel.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/sgd.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/lr_scheduler_exponential.cpp:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_wrapped.h:// SPDX-License-Identifier: Apache-2.0
+./optimizers/optimizer_wrapped.h:   * - learning_rate : float
+./optimizers/optimizer_wrapped.h:   * - decay_rate : float,
+./optimizers/optimizer_wrapped.h:   * - decay_steps : float,
+./optimizers/optimizer_wrapped.h:   * - beta1 : float,
+./optimizers/optimizer_wrapped.h:   * - beta2 : float,
+./optimizers/optimizer_wrapped.h:   * - epsilon : float,
+./optimizers/optimizer_wrapped.h:   * @details   This function accepts vector of properties in the format -
+./optimizers/lr_scheduler_constant.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/blas_interface.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/task_executor.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/task_executor.h:   * @brief Clean all non-running tasks from managed list
+./tensor/task_executor.h:    if (task->started())
+./tensor/task_executor.h:    auto work = task->getWork();
+./tensor/task_executor.h:    auto data = task->getData();
+./tensor/task_executor.h:    task->setState(Task::State::PROCESSING);
+./tensor/tensor_pool.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/tensor_pool.h:  size_t size() { return mem_pool->size(); }
+./tensor/tensor_pool.h:  size_t minMemoryRequirement() { return mem_pool->minMemoryRequirement(); }
+./tensor/tensor_pool.h:  bool isAllocated() const { return mem_pool->isAllocated(); }
+./tensor/optimized_v1_planner.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/optimized_v1_planner.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/optimized_v1_planner.cpp:  for (unsigned int exec = exec_start->first; exec <= exec_end->second;
+./tensor/optimized_v1_planner.cpp:            [](auto const &v1, auto const &v2) -> int {
+./tensor/optimized_v1_planner.cpp:    while (!sorted_req.empty() && sorted_req.back()->end <= req.start)
+./tensor/optimized_v1_planner.cpp:    for (int idx = sorted_req.size() - 1; idx >= 0; idx--) {
+./tensor/optimized_v1_planner.cpp:      if (sr->end <= req.start && sr->size == req.size) {
+./tensor/optimized_v1_planner.cpp:        req.offset = sr->offset;
+./tensor/optimized_v1_planner.cpp:      offset = sorted_req.back()->offset + sorted_req.back()->size;
+./tensor/tensor_dim.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/tensor_dim.cpp:  int shift_size = MAXDIM - dims.size();
+./tensor/tensor_dim.cpp:    setTensorDim(MAXDIM - cur_dim + cn, std::stoul((*i).str()));
+./tensor/tensor_dim.cpp:  int dirs[MAXDIM - 1];
+./tensor/tensor_dim.cpp:  if (this->format != rhs.format)
+./tensor/tensor_dim.cpp:    if (this->dim[i] != rhs.dim[i]) {
+./tensor/tensor_dim.cpp:  auto get_axis = [dynamic, this](unsigned int axis) -> int {
+./tensor/tensor_dim.cpp:    if (dynamic && dyn_dim_flag[MAXDIM - axis - 1]) {
+./tensor/tensor_dim.cpp:      return -1;
+./tensor/tensor_dim.cpp:    /// ex) 3:5:1:1 -> 3:5, we are setting eff_dim_flag to 0b1100
+./tensor/tensor_dim.cpp:    if (eff_dim_flag[MAXDIM - i - 1]) {
+./tensor/optimized_v3_planner.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/optimized_v3_planner.cpp:            [](auto const &v1, auto const &v2) -> int {
+./tensor/optimized_v3_planner.cpp:              return v1->offset < v2->offset;
+./tensor/optimized_v3_planner.cpp:    size_t top = sr->offset + sr->size;
+./tensor/optimized_v3_planner.cpp:    if (sr->offset > bottom) {
+./tensor/optimized_v3_planner.cpp:      vacant.push_back(std::make_pair(bottom, sr->offset));
+./tensor/optimized_v3_planner.cpp:  for (unsigned int exec = exec_start->first; exec <= exec_end->second;
+./tensor/optimized_v3_planner.cpp:            [](auto const &v1, auto const &v2) -> int {
+./tensor/optimized_v3_planner.cpp:                     [req](auto elem) { return elem->end <= req.start; }),
+./tensor/optimized_v3_planner.cpp:      if (vacant[idx].second - vacant[idx].first >= req.size) {
+./tensor/basic_planner.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/basic_planner.cpp:      throw std::runtime_error("Memory requested for non-valid duration.");
+./tensor/cache_elem.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_elem.cpp:  void *buf = device->getBuffer(offset, length, alloc_only);
+./tensor/cache_elem.cpp:  mem_data->setAddr((float *)buf);
+./tensor/cache_elem.cpp:  mem_data->setValid(true);
+./tensor/cache_elem.cpp:  msg += device->getDevicePath() + ") #" + std::to_string(id);
+./tensor/cache_elem.cpp:  void *buf = (void *)mem_data->getAddr();
+./tensor/cache_elem.cpp:  device->putBuffer(buf, dealloc_only);
+./tensor/cache_elem.cpp:  mem_data->setAddr(nullptr);
+./tensor/cache_elem.cpp:  mem_data->setValid(false);
+./tensor/optimized_v2_planner.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/optimized_v2_planner.cpp:  for (unsigned int exec = exec_start->first; exec <= exec_end->second;
+./tensor/optimized_v2_planner.cpp:  requests.reserve(memory_size.size() - n_wgrad);
+./tensor/optimized_v2_planner.cpp:            [](auto const &v1, auto const &v2) -> int {
+./tensor/optimized_v2_planner.cpp:    while (!sorted_req.empty() && sorted_req.back()->end <= req.start)
+./tensor/optimized_v2_planner.cpp:    for (int idx = sorted_req.size() - 1; idx >= 0; idx--) {
+./tensor/optimized_v2_planner.cpp:      if (sr->end <= req.start && sr->size == req.size) {
+./tensor/optimized_v2_planner.cpp:        req.offset = sr->offset;
+./tensor/optimized_v2_planner.cpp:      offset = sorted_req.back()->offset + sorted_req.back()->size;
+./tensor/optimized_v2_planner.cpp:      [](auto const &v1, auto const &v2) -> int { return v1.size > v2.size; });
+./tensor/optimized_v2_planner.cpp:        if (sr.mem_req->size >= req.size) {
+./tensor/optimized_v2_planner.cpp:          req.offset = sr.mem_req->offset;
+./tensor/optimized_v2_planner.cpp:        offset = wgrad_sorted_req.back().mem_req->offset +
+./tensor/optimized_v2_planner.cpp:                 wgrad_sorted_req.back().mem_req->size;
+./tensor/tensor_pool.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/tensor_pool.cpp:  NNTR_THROW_IF(spec.tensor->getDim().getDataLen() <
+./tensor/tensor_pool.cpp:    << " source tensor: " << spec.tensor->getDim().getDataLen()
+./tensor/tensor_pool.cpp:    << " name: " << spec.tensor->getName();
+./tensor/tensor_pool.cpp:  auto parent_idx = name_map.at(spec.tensor->getName());
+./tensor/tensor_pool.cpp:  mem_pool->clear();
+./tensor/tensor_pool.cpp:    if (!details || details->lifespan == TensorLifespan::UNMANAGED ||
+./tensor/tensor_pool.cpp:        details->exec_order.empty()) {
+./tensor/tensor_pool.cpp:    details->token = 0;
+./tensor/tensor_pool.cpp:    for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
+./tensor/tensor_pool.cpp:      if (details->exec_order[idx] >= start_order)
+./tensor/tensor_pool.cpp:        validity_start = std::min(validity_start, details->exec_order[idx]);
+./tensor/tensor_pool.cpp:      if (details->exec_order[idx] > old_end_order &&
+./tensor/tensor_pool.cpp:          details->exec_order[idx] != PERSIST_END_ORDER) {
+./tensor/tensor_pool.cpp:        details->exec_order[idx] = PERSIST_END_ORDER - 1;
+./tensor/tensor_pool.cpp:    for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
+./tensor/tensor_pool.cpp:      if (details->exec_order[idx] == PERSIST_END_ORDER) {
+./tensor/tensor_pool.cpp:        details->exec_order[idx] = validity_end;
+./tensor/tensor_pool.cpp:      if (details->exec_order[idx] <= end_order) {
+./tensor/tensor_pool.cpp:        validity_end = std::max(validity_end, details->exec_order[idx]);
+./tensor/tensor_pool.cpp:    if (isTensorLongTerm(details->lifespan)) {
+./tensor/tensor_pool.cpp:    details->token = mem_pool->requestMemory(
+./tensor/tensor_pool.cpp:      spec.tensor->bytes(), validity_start, validity_end + 1,
+./tensor/tensor_pool.cpp:      details->exec_order, details->lifespan, spec.is_weight_grad);
+./tensor/tensor_pool.cpp:    if (details->token == 0)
+./tensor/tensor_pool.cpp:    bytes_requested += spec.tensor->bytes();
+./tensor/tensor_pool.cpp:    double efficiency = mem_pool->planLayout(planner);
+./tensor/tensor_pool.cpp:  pool[name_map[name]].tensor->updateBatch(batch);
+./tensor/tensor_pool.cpp:  mem_pool->allocate();
+./tensor/tensor_pool.cpp:    if (!details || details->token == 0) {
+./tensor/tensor_pool.cpp:    spec.tensor->setData(mem_pool->getMemory(details->token), 0, true);
+./tensor/tensor_pool.cpp:    cache_loader->init();
+./tensor/tensor_pool.cpp:    cache_loader->finish();
+./tensor/tensor_pool.cpp:  mem_pool->deallocate();
+./tensor/tensor_pool.cpp:    spec.tensor->setData(nullptr);
+./tensor/tensor_pool.cpp:    << spec.tensor->getName();
+./tensor/tensor_pool.cpp:    dep_spec.tensor->setData(spec.tensor->getMemoryData(),
+./tensor/tensor_pool.cpp:                             spec.tensor->getOffset() + offset);
+./tensor/tensor_pool.cpp:  auto &name = spec.tensor->getName();
+./tensor/tensor_pool.cpp:  if (spec.tensor->empty())
+./tensor/tensor_pool.cpp:  name_map[name] = pool.size() - 1;
+./tensor/tensor_pool.cpp:  while (auto dep_details = std::get_if<DependentDetails>(&rs->details)) {
+./tensor/tensor_pool.cpp:    rs = &pool.at(dep_details->parent_idx);
+./tensor/tensor_pool.cpp:    << "Cannot set external tensor for non-zero lifespan for " << name;
+./tensor/tensor_pool.cpp:  NNTR_THROW_IF(t.size() != 0 && t.size() < spec.tensor->size(),
+./tensor/tensor_pool.cpp:    << spec.tensor->getName() << "(maybe view of " << name << ")";
+./tensor/tensor_pool.cpp:  spec.tensor->setData(t.getMemoryData(), t.getOffset());
+./tensor/tensor_pool.cpp:  NNTR_THROW_IF(dim != spec.tensor->getDim(), std::invalid_argument)
+./tensor/tensor_pool.cpp:    NNTR_THROW_IF(t->getDim() != dim, std::invalid_argument)
+./tensor/tensor_pool.cpp:    NNTR_THROW_IF(t->getInitializer() != init, std::invalid_argument)
+./tensor/tensor_pool.cpp:    pool->flush();
+./tensor/tensor_pool.cpp:    pool->flushExcept(order);
+./tensor/tensor_pool.cpp:    cache_loader->load(order);
+./tensor/tensor_pool.cpp:    return cache_loader->loadAsync(order, complete_callback);
+./tensor/tensor_pool.cpp:    return -1;
+./tensor/tensor_pool.cpp:  cache_loader->cancelAsync(id);
+./tensor/lazy_tensor.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_pool.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/tensor.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./tensor/tensor.h:      return getValue(b, c, h - ph, w - pw);
+./tensor/tensor.h:   * @details   This applies dot of the last dimension of this and second-last
+./tensor/tensor.h:   * @details   This applies dot of the last dimension of this and second-last
+./tensor/tensor.h:   * @note This will compute the derivative in-place and will overwrite existing
+./tensor/tensor.h:   * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate)
+./tensor/tensor.h:   * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) inplace
+./tensor/tensor.h:   * with rate @a (1-zoneout).
+./tensor/tensor.h:   * with rate @a (1-zoneout).
+./tensor/tensor.h:   * @brief     Normalize the Tensor elements in-place
+./tensor/tensor.h:   * @brief     Standardize the Tensor elements in-place
+./tensor/tensor.h:  void setRandUniform(float min = -0.05f, float max = 0.05f);
+./tensor/tensor.h:   * @note      The memory for this tensor will re-allocated/re-assigned if the
+./tensor/tensor.h:   * @note      If this tensor is re-allocated, then the memory might not be
+./tensor/tensor.h:    data->validate();
+./tensor/tensor.h:    return (T *)((float *)data->getAddr() + offset);
+./tensor/tensor.h:    data->validate();
+./tensor/tensor.h:    return (T *)((float *)data->getAddr() + offset);
+./tensor/tensor.h:    data->invalidate();
+./tensor/tensor.h:   * @brief    get address of n-d data
+./tensor/tensor.h:   * @brief    get address of n-d data
+./tensor/tensor.h:   * @brief Get linear index given the n-d index
+./tensor/tensor.h:  static constexpr float epsilon = 1e-5;
+./tensor/tensor.h:                       int cur_axis = -1, size_t offset = 0,
+./tensor/var_grad.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/var_grad.h:  TensorDim getDim() const { return var->getDim(); }
+./tensor/var_grad.h:  const std::string &getName() const { return var->getName(); }
+./tensor/var_grad.h:  const std::string &getGradientName() const { return grad->getName(); }
+./tensor/var_grad.h:    grad->initialize();
+./tensor/var_grad.h:    if (!var->empty())
+./tensor/var_grad.h:      var->updateBatch(batch);
+./tensor/var_grad.h:    if (grad && !grad->empty())
+./tensor/var_grad.h:      grad->updateBatch(batch);
+./tensor/var_grad.h:    if (var->isAllocated())
+./tensor/var_grad.h:      return grad->isAllocated();
+./tensor/var_grad.h:    return !grad->empty();
+./tensor/var_grad.h:  float getGradientNorm() const { return grad->l2norm(); }
+./tensor/optimized_v2_planner.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/var_grad.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/swap_device.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/swap_device.cpp:  off = lseek(fd, size - 1, SEEK_SET);
+./tensor/swap_device.cpp:  int diff = offset - off;
+./tensor/swap_device.cpp:  NNTR_THROW_IF(ptr == (void *)-1, std::runtime_error)
+./tensor/swap_device.cpp:  NNTR_THROW_IF(ret == -1, std::runtime_error)
+./tensor/swap_device.cpp:  fd = -1;
+./tensor/basic_planner.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/memory_planner.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/lazy_tensor.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/lazy_tensor.cpp:    [value](Tensor &t) mutable -> int { return t.add_i(value); });
+./tensor/lazy_tensor.cpp:  auto f = [&m, alpha](Tensor &t) mutable -> int { return t.add_i(m, alpha); };
+./tensor/lazy_tensor.cpp:  auto f = [&m](Tensor &t) mutable -> int { return t.subtract_i(m); };
+./tensor/lazy_tensor.cpp:  auto f = [value](Tensor &t) mutable -> int { return t.subtract_i(value); };
+./tensor/lazy_tensor.cpp:  auto f = [value](Tensor &t) mutable -> int { return t.multiply_i(value); };
+./tensor/lazy_tensor.cpp:  auto f = [&m](Tensor &t) mutable -> int { return t.multiply_i(m); };
+./tensor/lazy_tensor.cpp:  auto f = [value](Tensor &t) mutable -> int { return t.divide_i(value); };
+./tensor/lazy_tensor.cpp:  auto f = [&m](Tensor &t) mutable -> int { return t.divide_i(m); };
+./tensor/lazy_tensor.cpp:  auto f = [&m](Tensor &t) mutable -> int {
+./tensor/lazy_tensor.cpp:  auto f = [direction](Tensor &t) mutable -> int {
+./tensor/lazy_tensor.cpp:  auto f = [](Tensor &t) mutable -> int {
+./tensor/lazy_tensor.cpp:  auto f = [axis](Tensor &t) mutable -> int {
+./tensor/lazy_tensor.cpp:  auto f = [axis](Tensor &t) mutable -> int {
+./tensor/lazy_tensor.cpp:  auto f = [](Tensor &t) mutable -> int {
+./tensor/weight.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/weight.h:    if (!this->var->empty())
+./tensor/weight.h:      w.var = std::make_shared<Tensor>(this->var->clone());
+./tensor/weight.h:    if (!this->grad->empty())
+./tensor/weight.h:      w.grad = std::make_shared<Tensor>(this->grad->clone());
+./tensor/weight.h:      return regularizer_constant * 0.5f * var->l2norm();
+./tensor/weight.h:      grad->add_i(*var.get(), regularizer_constant);
+./tensor/weight.h:  void applyGradient(double lr) { var->add_i(*grad.get(), -lr); }
+./tensor/weight.h:      grad->multiply_i(clip_by_global_norm / (global_norm + epsilon));
+./tensor/weight.h:  static constexpr float epsilon = 1e-6; /**< epsilon for zero comparison */
+./tensor/weight.h:    1e-8; /**< epsilon for zero comparison */
+./tensor/weight.h:  void applyWeightDecay() { grad->add_i(*var.get(), decay); }
+./tensor/swap_device.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/swap_device.h:    fd(-1) {}
+./tensor/swap_device.h:    fd(-1) {}
+./tensor/manager.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/manager.h:   * Will return -1 except for android
+./tensor/manager.h:   * @return -1 if fd is not allocated (or unabled to allocate)
+./tensor/manager.h:      in->setBatchSize(batch);
+./tensor/manager.h:      out->setBatchSize(batch);
+./tensor/blas_interface.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/memory_pool.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/memory_pool.h: * @todo   Support releaseMemory(token) - this need not release actual memory
+./tensor/tensor.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./tensor/tensor.cpp:  BroadcastInfo() : buffer_size(0), buffer_axis(-1), strides{0, 0, 0, 0} {}
+./tensor/tensor.cpp:                                 -1 means no loop needed*/
+./tensor/tensor.cpp:    data = src_tensor->tensor()->data;
+./tensor/tensor.cpp:    offset = src_tensor->tensor()->offset + src_tensor->offset();
+./tensor/tensor.cpp:      delete[] mem_data->getAddr();
+./tensor/tensor.cpp:  if (this->dim != rhs.dim)
+./tensor/tensor.cpp:        std::fabs(data[i] - rdata[i]) > epsilon)
+./tensor/tensor.cpp:  } else if (dim.batch() * dim.channel() == 1) { /// fc layer - 2-D tensor
+./tensor/tensor.cpp:  } else { /// conv2d filters - 4d tensor, @todo extend this to > 4
+./tensor/tensor.cpp:    setRandUniform(-1.0f * sqrtFloat(1.0f / fan_in), sqrtFloat(1.0f / fan_in));
+./tensor/tensor.cpp:    setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in + fan_out)),
+./tensor/tensor.cpp:    setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in)),
+./tensor/tensor.cpp:    mem_data, [](auto *mem_data) { delete[] mem_data->getAddr(); });
+./tensor/tensor.cpp:          this->setValue(i, j, k, l, d[i][j][k][l]);
+./tensor/tensor.cpp:    this->multiply_strided(m, *this, beta);
+./tensor/tensor.cpp:  return this->multiply_strided(m, t, beta);
+./tensor/tensor.cpp:    this->add_strided(m, *this, beta);
+./tensor/tensor.cpp:  return this->add_strided(m, t, beta);
+./tensor/tensor.cpp:    this->multiply(m, *this, beta);
+./tensor/tensor.cpp:  return this->multiply(m, t, beta);
+./tensor/tensor.cpp:  this->divide(value, *this);
+./tensor/tensor.cpp:    this->divide(m, *this);
+./tensor/tensor.cpp:  return this->divide(m, t);
+./tensor/tensor.cpp:  this->add(value, *this);
+./tensor/tensor.cpp:  return this->add(m, t, alpha);
+./tensor/tensor.cpp:  this->subtract(value, *this);
+./tensor/tensor.cpp:int Tensor::subtract_i(Tensor const &m) { return add_i(m, -1); }
+./tensor/tensor.cpp:Tensor Tensor::subtract(Tensor const &m) const { return add(m, -1); }
+./tensor/tensor.cpp:  return add(m, out, -1);
+./tensor/tensor.cpp:  return getSharedDataTensor(dim_, offset * this->dim.getFeatureLen());
+./tensor/tensor.cpp:   * - If src already has data allocaed, then directly make dest tensor based on
+./tensor/tensor.cpp:   * - If src.data does not exist (meaning tensor does not memory allocated),
+./tensor/tensor.cpp:   * - If src.src_tensor exists, then use the src.src_tensor to create the
+./tensor/tensor.cpp:   * @note src.data and src.src_tensor CAN co-exist. src.src_tensor is stored
+./tensor/tensor.cpp:      src.src_tensor->tensor(), offset + src.src_tensor->offset());
+./tensor/tensor.cpp:  if (axis == -1) {
+./tensor/tensor.cpp:  if (axis == -1) {
+./tensor/tensor.cpp:                           TensorDim &reset_dim) -> float & {
+./tensor/tensor.cpp:    for (int i = 3; i >= 0; --i) {
+./tensor/tensor.cpp:        loc[i] -= reset_dim.getTensorDim(i);
+./tensor/tensor.cpp:  if (axis == -1) {
+./tensor/tensor.cpp:                       const TensorDim &ref_dim) -> float & {
+./tensor/tensor.cpp:    for (int i = 3; i >= 0; --i) {
+./tensor/tensor.cpp:      if (loc[i] - start_loc[i] == ref_dim.getTensorDim(i)) {
+./tensor/tensor.cpp:  return apply_broadcast_util(m, v_func, output, this->computeBroadcastInfo(m));
+./tensor/tensor.cpp:  const float *buf = this->getData();
+./tensor/tensor.cpp:    ret.copy(this->getData());
+./tensor/tensor.cpp:    this->sum(axes[0], output, alpha);
+./tensor/tensor.cpp:      if (axes[i] == axes[i - 1] + 1) {
+./tensor/tensor.cpp:        new_reshaped.mergeAxis(axes[i - 1], axes[i]);
+./tensor/tensor.cpp:    for (unsigned int i = 1; i < new_axes.size() - 1; ++i)
+./tensor/tensor.cpp:    const Tensor this_b = this->getBatchSlice(b, 1);
+./tensor/tensor.cpp: * computation. So, while performing, these matrices are behaving as 2-D
+./tensor/tensor.cpp:  // direction. It supposes to have this->dim as [ BxCxH,W ] and m.dim is
+./tensor/tensor.cpp:  int indexI = direction[0] - '0';
+./tensor/tensor.cpp:  int indexJ = direction[2] - '0';
+./tensor/tensor.cpp:  float scale = 1.0 / (1 - dropout);
+./tensor/tensor.cpp:  float en_mask_val = 1.0 - fill_mask_val;
+./tensor/tensor.cpp:    en_mask_val = 1.0 - fill_mask_val;
+./tensor/tensor.cpp:        << data[len - 3] << ' ' << data[len - 2] << ' ' << data[len - 1] << ']'
+./tensor/tensor.cpp:              << this->getValue(k, l, i, j) << " ";
+./tensor/tensor.cpp:    out << "-------" << std::endl;
+./tensor/tensor.cpp:  // todo: enable copy to non-contiguous tensor
+./tensor/tensor.cpp:    throw std::runtime_error("Cannot copy non-contiguous tensor");
+./tensor/tensor.cpp:  // todo: enable copy to non-contiguous tensor
+./tensor/tensor.cpp:    throw std::runtime_error("Cannot copy non-contiguous tensor");
+./tensor/tensor.cpp:  if (alloc && this->empty()) {
+./tensor/tensor.cpp:    this->copy(from);
+./tensor/tensor.cpp:      "[Tensor::fill] non-contiguous tensors are not supported");
+./tensor/tensor.cpp:  this->copy(from.getData());
+./tensor/tensor.cpp:    this->sum(axis, output, 1.0 / ((float)axis_size));
+./tensor/tensor.cpp:    return this->average(output);
+./tensor/tensor.cpp:  return this->sum(axes, output, 1.0 / (float)ret_shape.getDataLen());
+./tensor/tensor.cpp:    apply_i([](float val) -> float { return 0; });
+./tensor/tensor.cpp:    result[b] = std::distance(data, max_iter) - (b * feature_len);
+./tensor/tensor.cpp:    this->subtract_i(tmp);
+./tensor/tensor.cpp:    this->subtract_i(min);
+./tensor/tensor.cpp:    this->divide_i(max - min);
+./tensor/tensor.cpp:  Tensor mean_by_batch = this->sum_by_batch();
+./tensor/tensor.cpp:  this->subtract_i(mean_by_batch);
+./tensor/tensor.cpp:    Tensor sub_this = this->getBatchSlice(k, 1);
+./tensor/tensor.cpp:  this->divide_i(std_dev_by_batch);
+./tensor/tensor.cpp:  if (m.size() > this->size())
+./tensor/tensor.cpp:  e.buffer_axis = -1;
+./tensor/tensor.cpp:  for (int axis = 3; axis >= 0; --axis) {
+./tensor/tensor.cpp:    for (axis = 3; axis >= 0; --axis) {
+./tensor/tensor.cpp:    /// if consecutive-one strategy has bigger chunk size, replace the
+./tensor/memory_pool.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/memory_pool.cpp:  char *ptr = static_cast<char *>(mem_pool) + memory_offset.at(idx - 1);
+./tensor/weight.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_elem.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_loader.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_loader.cpp:  task_executor = new TaskExecutor(pool->getName());
+./tensor/cache_loader.cpp:void CacheLoader::load(unsigned int order) { pool->loadExec(order); }
+./tensor/cache_loader.cpp:    pool->flushExcept({exe_order - 1, exe_order});
+./tensor/cache_loader.cpp:    pool->loadExec(exe_order);
+./tensor/cache_loader.cpp:  task->setTimeout(timeout_ms);
+./tensor/cache_loader.cpp:  return task_executor->run(task, complete);
+./tensor/cache_loader.cpp:    task_executor->cancel(id);
+./tensor/cache_loader.cpp:            pool->getName().c_str(), id, e.what());
+./tensor/manager.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/manager.cpp:  fd(-1),
+./tensor/manager.cpp:  int fd_ = -1;
+./tensor/manager.cpp:    if (fd_ != -1) {
+./tensor/manager.cpp:  if (fd != -1) {
+./tensor/manager.cpp:  fd = -1;
+./tensor/manager.cpp:      use_first_last = 1 - use_first_last;
+./tensor/manager.cpp:            shared_inout.getSharedDataTensor(io->getDim(), offset);
+./tensor/manager.cpp:            io->getDim(),
+./tensor/manager.cpp:            max_shared_inout - io->getDim().getDataLen() - offset);
+./tensor/manager.cpp:        offset += io->getDim().getDataLen();
+./tensor/manager.cpp:      io->initialize(shared_inout_cur, Tensor(), false);
+./tensor/manager.cpp:    use_first_last = 1 - use_first_last;
+./tensor/manager.cpp:    bool is_last_layer = idx == in_outs.size() - 1;
+./tensor/manager.cpp:          io->initialize(
+./tensor/manager.cpp:            Tensor(), shared_deriv.getSharedDataTensor(io->getDim(), offset));
+./tensor/manager.cpp:          offset += io->getDim().getDataLen();
+./tensor/manager.cpp:          io->initializeShared();
+./tensor/manager.cpp:        io->initialize(Tensor(), Tensor(), true);
+./tensor/manager.cpp:  return orders[orders.size() - 2];
+./tensor/task.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/optimized_v3_planner.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/tensor_wrap_specs.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/memory_data.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_loader.h:// SPDX-License-Identifier: Apache-2.0
+./tensor/task_executor.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/task_executor.cpp:  auto work = task->getWork();
+./tensor/task_executor.cpp:  return work(running, task->getData());
+./tensor/#tensor_dim.cpp#:// SPDX-License-Identifier: Apache-2.0
+./tensor/#tensor_dim.cpp#:  int shift_size = MAXDIM - dims.size();
+./tensor/#tensor_dim.cpp#:    setTensorDim(MAXDIM - cur_dim + cn, std::stoul((*i).str()));
+./tensor/#tensor_dim.cpp#:  int dirs[MAXDIM - 1];
+./tensor/#tensor_dim.cpp#:  if (this->format != rhs.format)
+./tensor/#tensor_dim.cpp#:    if (this->dim[i] != rhs.dim[i]) {
+./tensor/#tensor_dim.cpp#:  auto get_axis = [dynamic, this](unsigned int axis) -> int {
+./tensor/#tensor_dim.cpp#:    if (dynamic && dyn_dim_flag[MAXDIM - axis - 1]) {
+./tensor/#tensor_dim.cpp#:      return -1;
+./tensor/#tensor_dim.cpp#:    /// ex) 3:5:1:1 -> 3:5, we are setting eff_dim_flag to 0b1100
+./tensor/#tensor_dim.cpp#:    if (eff_dim_flag[MAXDIM - i - 1]) {
+./tensor/cache_pool.cpp:// SPDX-License-Identifier: Apache-2.0
+./tensor/cache_pool.cpp:  NNTR_THROW_IF(swap_device->isOperating(), std::runtime_error)
+./tensor/cache_pool.cpp:  swap_device->start(pool_size);
+./tensor/cache_pool.cpp:  if (!swap_device->isOperating())
+./tensor/cache_pool.cpp:  swap_device->finish();
+./tensor/cache_pool.cpp:  if (!elems[id]->isActive()) {
+./tensor/cache_pool.cpp:    elems[id]->swapIn();
+./tensor/cache_pool.cpp:  if (elems[id]->isActive()) {
+./tensor/cache_pool.cpp:    elems[id]->swapOut();
+./tensor/cache_pool.cpp:  NNTR_THROW_IF(!swap_device->isOperating(), std::invalid_argument)
+./tensor/cache_pool.cpp:  off_t offset = getMemoryOffset().at(id - 1);
+./tensor/cache_pool.cpp:  size_t len = getMemorySize().at(id - 1);
+./tensor/cache_pool.cpp:  auto exe_order = getMemoryExecOrder().at(id - 1);
+./tensor/cache_pool.cpp:  auto policy = getCachePolicy().at(id - 1);
+./tensor/cache_pool.cpp:    elem->swapOut(CacheElem::LAST_ACCESS);
+./tensor/cache_pool.cpp:    elem->reset();
+./tensor/cache_pool.cpp:  actives.remove_if([&, order](auto elem) -> bool {
+./tensor/cache_pool.cpp:    auto id = elem->getId();
+./tensor/cache_pool.cpp:    auto exe_order = exe_orders.at(id - 1);
+./tensor/cache_pool.cpp:      elem->swapOut(opt);
+./tensor/cache_pool.cpp:  actives.remove_if([&, order](const auto elem) -> bool {
+./tensor/cache_pool.cpp:    auto id = elem->getId();
+./tensor/cache_pool.cpp:    auto exe_order = exe_orders.at(id - 1);
+./tensor/cache_pool.cpp:    elem->swapOut(opt);
+./tensor/cache_pool.cpp:bool CachePool::isAllocated() const { return swap_device->isOperating(); }
+./tensor/cache_pool.cpp:    auto exe_order = exe_orders.at(id - 1);
+./tensor/cache_pool.cpp:    elem->swapIn();
+./tensor/cache_pool.cpp:    elem->swapOut();
+./utils/nntr_threads.cpp:// SPDX-License-Identifier: Apache-2.0
+./utils/nntr_threads.cpp:  unsigned int chunk = (end - start + (num_workers - 1)) / num_workers;
+./utils/nntr_threads.cpp:    workers.push_back(std::thread(cb, s, e, i, user_data_prop->get()));
+./utils/node_exporter.h:// SPDX-License-Identifier: Apache-2.0
+./utils/node_exporter.h:          stored_result->emplace_back(std::move(key), to_string(prop));
+./utils/node_exporter.h:      if (istrequal(prop_key, iter->first) == true) {
+./utils/node_exporter.h:        from_string(iter->second, prop);
+./utils/tracer.h:// SPDX-License-Identifier: Apache-2.0
+./utils/tracer.h:  nntrainer::MemoryTracer::getInstance()->tracePoint(msg)
+./utils/tracer.h:  nntrainer::TimeTracer::getInstance()->tracePoint(msg)
+./utils/tracer.cpp:// SPDX-License-Identifier: Apache-2.0
+./utils/tracer.cpp:auto outputFileName = [](std::string name) -> std::string {
+./utils/tracer.cpp:              std::chrono::system_clock::now().time_since_epoch() - start)
+./utils/node_exporter.cpp:// SPDX-License-Identifier: Apache-2.0
+./utils/node_exporter.cpp:  tf_node->finalize();
+./utils/node_exporter.cpp:  tf_node->setLayerNode(*self);
+./utils/node_exporter.cpp:  tf_node->setOpType(tflite::BuiltinOperator_FULLY_CONNECTED);
+./utils/node_exporter.cpp:  tf_node->setBuiltinOptions(tflite::BuiltinOptions_FullyConnectedOptions,
+./utils/node_exporter.cpp:    tf_node->setOpType(tflite::BuiltinOperator_RELU);
+./utils/node_exporter.cpp:    tf_node->setBuiltinOptions(tflite::BuiltinOptions_NONE,
+./utils/node_exporter.cpp:    tf_node->setOpType(tflite::BuiltinOperator_SOFTMAX);
+./utils/node_exporter.cpp:    tf_node->setBuiltinOptions(tflite::BuiltinOptions_SoftmaxOptions, options);
+./utils/node_exporter.cpp:        .channel() /** value **/); // effective dimension = {bias->channel()}
+./utils/node_exporter.cpp:  tf_node->setWeightTransformFn(weight_transform);
+./utils/node_exporter.cpp:  tf_node->setOpType(tflite::BuiltinOperator_CONV_2D);
+./utils/node_exporter.cpp:  tf_node->setBuiltinOptions(tflite::BuiltinOptions_Conv2DOptions, options);
+./utils/node_exporter.cpp:  // input layer exports to Transpose operator (NCHW -> NHWC)
+./utils/node_exporter.cpp:  tf_node->setOpType(tflite::BuiltinOperator_TRANSPOSE);
+./utils/node_exporter.cpp:  tf_node->setBuiltinOptions(tflite::BuiltinOptions_TransposeOptions,
+./utils/node_exporter.cpp:  tf_node->setInputTransformFn(input_transform);
+./utils/node_exporter.cpp:  assert(tf_node->getOutputs().size() == 1);
+./utils/node_exporter.cpp:  auto output_tensor = const_cast<Tensor *>(tf_node->getOutputs()[0]);
+./utils/node_exporter.cpp:  output_tensor->allocate();
+./utils/node_exporter.cpp:    tf_node->setOpType(tflite::BuiltinOperator_AVERAGE_POOL_2D);
+./utils/node_exporter.cpp:    tf_node->setBuiltinOptions(tflite::BuiltinOptions_Pool2DOptions, options);
+./utils/node_exporter.cpp:  tf_node->setOpType(tflite::BuiltinOperator_RESHAPE);
+./utils/node_exporter.cpp:  auto new_shape = fbb->CreateVector(new_shape_vec);
+./utils/node_exporter.cpp:  tf_node->setBuiltinOptions(tflite::BuiltinOptions_ReshapeOptions, options);
+./utils/node_exporter.cpp:  tf_node->setOpType(tflite::BuiltinOperator_RESHAPE);
+./utils/node_exporter.cpp:  auto new_shape = fbb->CreateVector(new_shape_vec);
+./utils/node_exporter.cpp:  tf_node->setBuiltinOptions(tflite::BuiltinOptions_ReshapeOptions, options);
+./utils/node_exporter.cpp:  tf_node->setOpType(tflite::BuiltinOperator_ADD);
+./utils/node_exporter.cpp:  tf_node->setBuiltinOptions(tflite::BuiltinOptions_AddOptions, options);
+./utils/meson.build:if get_option('enable-trace')
+./utils/util_func.h: *   http://www.apache.org/licenses/LICENSE-2.0
+./utils/util_func.h: * @retval true if string is case-insensitive equal
+./utils/util_func.h: * @retval false if string is case-insensitive not equal
+./utils/base_properties.h:// SPDX-License-Identifier: Apache-2.0
+./utils/base_properties.h:  auto last_iter = property.end() - 1;
+./utils/base_properties.h:  auto last_iter = value.end() - 1;
+./utils/ini_wrapper.cpp:// SPDX-License-Identifier: Apache-2.0
+./utils/ini_wrapper.cpp:    this->section_name = section_name;
+./utils/ini_wrapper.cpp:    this->entry[it.first] = it.second;
+./utils/ini_wrapper.cpp:    if (cur[0] == '-') {
+./utils/profiler.cpp:// SPDX-License-Identifier: Apache-2.0
+./utils/profiler.cpp:  auto &cnt_ = std::get<GenericProfileListener::CNT>(time_iter->second);
+./utils/profiler.cpp:  auto &cur_ = std::get<GenericProfileListener::CUR>(time_iter->second);
+./utils/profiler.cpp:  auto &min_ = std::get<GenericProfileListener::MIN>(time_iter->second);
+./utils/profiler.cpp:  auto &max_ = std::get<GenericProfileListener::MAX>(time_iter->second);
+./utils/profiler.cpp:  auto &sum_ = std::get<GenericProfileListener::SUM>(time_iter->second);
+./utils/profiler.cpp:    onNotifyTimeEvent(event, data->time_item, data->event_str, data->duration);
+./utils/profiler.cpp:    onNotifyMemoryEvent(event, data->alloc_current, data->alloc_total,
+./utils/profiler.cpp:                        data->event_str, data->duration, data->cache_policy,
+./utils/profiler.cpp:                        data->cache_swap);
+./utils/profiler.cpp:      std::get<GenericProfileListener::CNT>(iter->second) == 0) {
+./utils/profiler.cpp:  return std::get<GenericProfileListener::CUR>(iter->second);
+./utils/profiler.cpp:      std::max(column_size[0], static_cast<unsigned int>(title->second.size()));
+./utils/profiler.cpp:    std::chrono::duration_cast<std::chrono::microseconds>(end - start_time);
+./utils/profiler.cpp:  // clang-format off
+./utils/profiler.cpp:  // clang-format on
+./utils/profiler.cpp:        out_ << std::left << std::setw(total_col_size) << title->second
+./utils/profiler.cpp:      // clang-format off
+./utils/profiler.cpp:      out_ << std::setw(column_size[0]) << title->second
+./utils/profiler.cpp:          << std::setw(column_size[1]) << sum_.count() / (cnt_ - warmups)
+./utils/profiler.cpp:      // clang-format on
+./utils/profiler.cpp:    ordered_report[-time.first] = func;
+./utils/profiler.cpp:  // clang-format off
+./utils/profiler.cpp:  // clang-format on
+./utils/profiler.cpp:      // clang-format off
+./utils/profiler.cpp:      // clang-format on
+./utils/profiler.cpp:  auto data = std::make_shared<ProfileEventData>(item, 0, 0, name->second,
+./utils/profiler.cpp:    std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+./utils/profiler.cpp:    std::make_shared<ProfileEventData>(item, 0, 0, name->second, duration);
+./utils/profiler.cpp:  for (auto &l : time_item_listeners[data->time_item])
+./utils/profiler.cpp:    l->notify(event, data);
+./utils/profiler.cpp:    l->notify(event, data);
+./utils/profiler.cpp:    end - std::get<timepoint>(found->second));
+./utils/profiler.cpp:  auto size = std::get<size_t>(found->second);
+./utils/profiler.cpp:  total_size -= size;
+./utils/profiler.cpp:  auto str = std::get<std::string>(found->second);
+./utils/nntr_threads.h:// SPDX-License-Identifier: Apache-2.0
+./utils/profiler.h:// SPDX-License-Identifier: Apache-2.0
+./utils/profiler.h:   * @brief trace memory de-allocation
+./utils/profiler.h:   * @param ptr de-allocated memory pointer
+./utils/ini_wrapper.h:// SPDX-License-Identifier: Apache-2.0
+./utils/ini_wrapper.h:   * -epochs = 1" will delete epochs, and overwrite type and decayrate
+./utils/util_func.cpp: *   http://www.apache.org/licenses/LICENSE-2.0
+./utils/util_func.cpp:static std::uniform_real_distribution<float> dist(-0.5, 0.5);
+./utils/util_func.cpp:float logFloat(float x) { return log(x + 1.0e-20); }
+./utils/util_func.cpp:            in.getValue(i, j, (in.height() - k - 1), (in.width() - l - 1)));
+./utils/util_func.cpp:  size_t spos = target.size() - suffix.size();
+./utils/util_func.cpp:                                                        reg, -1);
+./utils/base_properties.cpp:// SPDX-License-Identifier: Apache-2.0
+./utils/base_properties.cpp:    target.setTensorDim(cur_axis--, std::stoul(*iter));