There is configuration bugs for the FP32 only case.
This PR fixes the configuration and some of the ENABLE_FP16 compiler
macro errors.
Resolves:
**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
nntrainer::Tensor in_tensor;
nntrainer::sharedConstTensor test;
try {
- in_tensor = nntrainer::Tensor({input});
+ in_tensor = nntrainer::Tensor(
+ {input}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16});
} catch (...) {
std::cerr << "Error while construct tensor" << std::endl;
return 0;
nntrainer::Tensor q_in, nq_in;
try {
- q_in = nntrainer::Tensor(inbatch);
- nq_in = nntrainer::Tensor(next_inbatch);
+ q_in = nntrainer::Tensor(
+ inbatch, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16});
+ nq_in = nntrainer::Tensor(next_inbatch, {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16});
} catch (...) {
std::cerr << "Error during tensor constructino" << std::endl;
return 0;
}
nntrainer::Tensor in_tensor;
try {
- in_tensor = nntrainer::Tensor(inbatch);
+ in_tensor = nntrainer::Tensor(
+ inbatch, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16});
mainNet.forwarding({MAKE_SHARED_TENSOR(in_tensor)}, {Q});
mainNet.backwarding(iter);
} catch (...) {
nntrainer::Tensor X;
try {
- X = nntrainer::Tensor({featureVector});
- NN.forwarding({MAKE_SHARED_TENSOR(X)})[0]->apply(stepFunction);
+ X = nntrainer::Tensor({featureVector}, {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16});
+ NN.forwarding({MAKE_SHARED_TENSOR(X)})[0]->apply<float>(stepFunction);
} catch (...) {
std::cerr << "Error while forwarding the model" << std::endl;
return 1;
#include <bitset>
#include <vector>
+#ifdef ENABLE_FP16
#ifdef USE__FP16
-#define _FP16 __fp16
+#define _FP16 __fp16
#else
-#define _FP16 _Float16
+#define _FP16 _Float16
+#endif
#endif
namespace ml {
public:
static constexpr const size_t MAXDIM = 4;
+ /**
+ * @brief Tensor Format. Channel Last or Channel First
+ *
+ */
enum class Format { NCHW, NHWC };
+ /**
+ * @brief Tensor Data Type. Currently FP16 & FP32 Support
+ *
+ */
enum class DataType {
FP16, /** half precion */
FP32 /** single precision */
};
+ /**
+ * @brief Tensor Type which context to hold the Format & DataType
+ *
+ */
struct TensorType {
/**
* @brief Tensor Formant : Default is NCHW
*/
Format format;
+
+ /**
+ * @brief Tensor Data Type : Default is FP32
+ */
DataType data_type;
- TensorType() : format(Format::NCHW), data_type(DataType::FP32) {};
+ /**
+ * @brief Default creator of Tensor Type
+ */
+ TensorType() : format(Format::NCHW), data_type(DataType::FP32){};
- TensorType(Format fm, DataType d_type) : format(fm), data_type(d_type) {};
+ /**
+ * @brief Default creator of Tensor Type with Format & DataType
+ */
+ TensorType(Format fm, DataType d_type) : format(fm), data_type(d_type){};
};
/**
*/
static unsigned int getNumDim();
+ /**
+ * @brief Creator of TensorDim with Format & DataType
+ *
+ * @param fm format NCHW | HNWC
+ * @param fm DataType FP16 | FP32
+ * @param eff_dim_flag_ effective dimension flag (1 means it's effective)
+ * @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
+ */
TensorDim(TensorDim::Format fm, TensorDim::DataType d_type,
- const std::bitset<MAXDIM> &eff_dim_flag_ = 0b1111,
- const std::bitset<MAXDIM> &dyn_dim_flag_ = 0b0000);
+ const std::bitset<MAXDIM> &eff_dim_flag_ = 0b1111,
+ const std::bitset<MAXDIM> &dyn_dim_flag_ = 0b0000);
/**
* @brief Construct a new Tensor Dim object
* @param eff_dim_flag_ effective dimension flag (1 means it's effective)
* @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
*/
- explicit TensorDim(TensorType t_type_=TensorType(),
+ explicit TensorDim(TensorType t_type_ = TensorType(),
const std::bitset<MAXDIM> &eff_dim_flag_ = 0b1111,
const std::bitset<MAXDIM> &dyn_dim_flag_ = 0b0000);
TensorDim(std::initializer_list<size_t> dims,
TensorType t_type_ = TensorType());
- // TensorDim(std::initializer_list<size_t> dims, TensorDim::Format fm=Format::NCHW,
+ // TensorDim(std::initializer_list<size_t> dims, TensorDim::Format
+ // fm=Format::NCHW,
// TensorDim::DataType d_type=DataType::FP32);
/**
* @param shapes shapes without batch dimension
* @param fm format NCHW | HNWC
*/
- TensorDim(const std::array<size_t, 3> &shapes, TensorType t_type_ = TensorType());
+ TensorDim(const std::array<size_t, 3> &shapes,
+ TensorType t_type_ = TensorType());
- // TensorDim(const std::array<size_t, 3> &shapes, TensorDim::Format fm = Format::NCHW,
+ // TensorDim(const std::array<size_t, 3> &shapes, TensorDim::Format fm =
+ // Format::NCHW,
// TensorDim::DataType d_type=DataType::FP32);
/**
const std::bitset<MAXDIM> &eff_dim_flag_ = 0b1111,
const std::bitset<MAXDIM> &dyn_dim_flag_ = 0b0000);
+ /**
+ * @brief Construct a new Tensor Dim object
+ *
+ * @param b batch
+ * @param c channel
+ * @param h height
+ * @param w width
+ * @param fm format NCHW | HNWC
+ * @param d_type Data Type FP16 | FP32
+ * @param eff_dim_flag_ dimension bit flag to calculate the dynamic
+ * dimension, rightmost is width
+ */
TensorDim(size_t d0, size_t d1, size_t d2, size_t d3, TensorDim::Format fm,
TensorDim::DataType d_type,
const std::bitset<MAXDIM> &eff_dim_flag_ = 0b1111,
*/
TensorDim(const std::string &shape, TensorType t_type_ = TensorType());
- TensorDim(const std::string &shape,
- TensorDim::Format fm,
+ /**
+ * @brief Construct a new Tensor Dim object
+ *
+ * @param shape shape of format
+ * @param fm format NCHW | HNWC
+ * @param d_type data type FP16 | FP32
+ */
+ TensorDim(const std::string &shape, TensorDim::Format fm,
TensorDim::DataType d_type = TensorDim::DataType::FP32);
/**
/**
* @brief get data type size
*/
- uint getDataTypeSize() const ;
+ uint getDataTypeSize() const;
/**
* @brief Set the Dim Flag to retrieve effective dimension
* @param fm NCHW | NHWC
* @return int ML_ERROR_NONE if successs
*/
- int setTensorDim(const std::string &input_shape, TensorType t_type_=TensorType());
+ int setTensorDim(const std::string &input_shape,
+ TensorType t_type_ = TensorType());
// int setTensorDim(const std::string &input_shape, TensorDim::Format fm,
// TensorDim::DataType d_type);
* @brief getType
*
*/
- TensorDim::DataType getDataType() const { return t_type.data_type; };
+ TensorDim::DataType getDataType() const { return t_type.data_type; };
/**
* @brief setFormat
Section: libs
Priority: optional
Maintainer: Jijoong Moon <jijoong.moon@samsung.com>
-Build-Depends: gcc-9 | gcc-8 | gcc-7 (>=7.5),
+Build-Depends: gcc-13 | gcc-12 | gcc-11 | gcc-10 | gcc-9 | gcc-8 | gcc-7 (>=7.5),
python3, python3-numpy,
pkg-config, cmake, ninja-build, meson (>=0.50), debhelper (>=9),
libopenblas-dev, libiniparser-dev (>=4.1), tensorflow2-lite-dev, libjsoncpp-dev,
endif # MESON_HAS_TFLITE
-# include $(CLEAR_VARS)
+include $(CLEAR_VARS)
-# LOCAL_MODULE := openblas
+LOCAL_MODULE := openblas
-# LOCAL_SRC_FILES := @MESON_BLAS_ROOT@/lib/libopenblas.a
-# LOCAL_EXPORT_C_INCLUDES := @MESON_BLAS_ROOT@/include
-# LOCAL_EXPORT_CFLAGS += -DUSE_BLAS=1
+LOCAL_SRC_FILES := @MESON_BLAS_ROOT@/lib/libopenblas.a
+LOCAL_EXPORT_C_INCLUDES := @MESON_BLAS_ROOT@/include
+LOCAL_EXPORT_CFLAGS += -DUSE_BLAS=1
-# include $(PREBUILT_STATIC_LIBRARY)
+include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp
-LOCAL_STATIC_LIBRARIES += iniparser #openblas
+LOCAL_STATIC_LIBRARIES += iniparser openblas
ifeq ($(MESON_HAS_TFLITE), 1)
LOCAL_STATIC_LIBRARIES += tensorflow-lite
and_conf.set('MESON_HAS_TFLITE', 0)
endif
-# if blas_dep.found()
-# and_conf.set('MESON_BLAS_ROOT', blas_root)
-# else
-# error('blas is needed for the android build')
-# endif
+if blas_dep.found()
+ and_conf.set('MESON_BLAS_ROOT', blas_root)
+else
+ error('blas is needed for the android build')
+endif
if ml_api_common_dep.found()
and_conf.set('MESON_ML_API_COMMON_ROOT', ml_api_common_root)
]
-# arm_fp16_flags = [
-# '-mfp16-format=alternative',
-# '-mfpu=neon-fp16,
-# '-mfloat-abi=softfp'
-# ]
-
-# if get_option('enable-fp16')
-# foreach extra_arg : arm_fp16_flags
-# if cc.has_argument (extra_arg)
-# add_project_arguments([extra_arg], language: 'c')
-# endif
-# if cxx.has_argument (extra_arg)
-# add_project_arguments([extra_arg], language: 'cpp')
-# endif
-# endforeach
-# enfif
-
if get_option('enable-fp16')
arch = target_machine.cpu_family()
- extra_defines += '-DENABLE_FP16=1'
-
if get_option('platform') == 'android'
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
+ extra_defines += '-DENABLE_FP16=1'
extra_defines += '-DUSE__FP16=1'
elif arch == 'aarch64' or arch =='arm'
+ extra_defines += '-DENABLE_FP16=1'
extra_defines += '-DUSE__FP16=1'
else
has_avx512fp16 = cc.has_argument('-mavx512fp16')
if (has_avx512fp16)
# add_project_arguments(['-mavx512fp16'], language: ['c','cpp'])
message ('Float16 for x86_64 enabled. Modern gcc-x64 genrally supports float16 with _Float16. -mavx512fp16 added for hardware acceleration')
+ extra_defines += '-DENABLE_FP16=1'
else
warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for AVX512 FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
endif
extra_defines += '-DNNTRAINER_CONF_PATH="@0@"'.format(nntrainer_conf_abs_path)
endif
-# if get_option('enable-fp16')
-# extra_defines += '-march=armv8.2-a+fp16 -mfpu=neon-fp16 -mfloat-abi=softfp'
-# endif
-
message('extra defines are:' + ' '.join(extra_defines))
foreach defs: extra_defines
add_project_arguments(defs, language: ['c', 'cpp'])
warning('android app is not supported for now, building app skipped')
else
# this is needed for reinforcement application. We can move this to reinforecement app dependency
- # jsoncpp_dep = dependency('jsoncpp') # jsoncpp
- # libcurl_dep = dependency('libcurl')
- # if not tflite_dep.found()
- # error('Tensorflow-Lite dependency not found')
- # endif
+ jsoncpp_dep = dependency('jsoncpp') # jsoncpp
+ libcurl_dep = dependency('libcurl')
+ if not tflite_dep.found()
+ error('Tensorflow-Lite dependency not found')
+ endif
subdir('Applications')
endif
endif
-# if get_option('platform') != 'android'
-# nnstreamer_dep = dependency('nnstreamer')
-# message('building nnstreamer')
-# subdir('nnstreamer')
-# else
-# warning('android nnstreamer-filter and nnstreamer-trainer are not yet supported, building them is skipped')
-# endif
+if get_option('platform') != 'android'
+ nnstreamer_dep = dependency('nnstreamer')
+ message('building nnstreamer')
+ subdir('nnstreamer')
+else
+ warning('android nnstreamer-filter and nnstreamer-trainer are not yet supported, building them is skipped')
+endif
if get_option('platform') == 'android'
subdir('jni')
option('enable-logging', type: 'boolean', value: true)
option('enable-tizen-feature-check', type: 'boolean', value: true)
option('enable-nnstreamer-backbone', type: 'boolean', value: false)
-option('enable-tflite-backbone', type: 'boolean', value: false)
+option('enable-tflite-backbone', type: 'boolean', value: true)
option('enable-profile', type: 'boolean', value: false)
option('enable-trace', type: 'boolean', value: false)
option('enable-debug', type: 'boolean', value: false)
-option('enable-tflite-interpreter', type: 'boolean', value: false)
+option('enable-tflite-interpreter', type: 'boolean', value: true)
option('enable-memory-swap', type: 'boolean', value: false)
option('memory-swap-path', type: 'string', value: '')
option('test-timeout', type: 'integer', value: 60)
option('enable-long-test', type: 'boolean', value: false)
# backend options
-option('enable-blas', type: 'boolean', value: false)
-option('enable-fp16', type: 'boolean', value: true)
+option('enable-blas', type: 'boolean', value: true)
+option('enable-fp16', type: 'boolean', value: false)
option('enable-cublas', type: 'boolean', value: false)
option('enable-openmp', type: 'boolean', value: true)
if (data_type == ml::train::TensorDim::DataType::FP32) {
sm.setActiFunc<float>(ActivationType::ACT_SOFTMAX);
} else if (data_type == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
sm.setActiFunc<_FP16>(ActivationType::ACT_SOFTMAX);
+#else
+ throw std::runtime_error("enable-fp16 is not enabled");
+#endif
}
}
for (auto const &val : vec) {
if (val->getVariableRef().getTensorType().data_type ==
TensorDim::DataType::FP32) {
- tensor_map[val->getName()] = val->getVariableRef().getData<float>();
- tensor_map[val->getGradientName()] =
- val->getGradientRef().getData<float>();
+ tensor_map[val->getName()] = val->getVariableRef().getData();
+ tensor_map[val->getGradientName()] = val->getGradientRef().getData();
} else if (val->getVariableRef().getTensorType().data_type ==
- TensorDim::DataType::FP32) {
- tensor_map[val->getName()] =
- val->getVariableRef().getData<_FP16>();
+ TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ tensor_map[val->getName()] = val->getVariableRef().getData<_FP16>();
tensor_map[val->getGradientName()] =
val->getGradientRef().getData<_FP16>();
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
};
LossLayer::updateLoss(context, l);
}
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
// update the loss value
LossLayer::updateLoss(context, l);
}
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
if (dataType == ml::train::TensorDim::DataType::FP32) {
y.apply(ActiFunc::softmax<float>, ret);
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
y.apply(ActiFunc::softmax<_FP16>, ret);
+#else
+ throw std::runtime_error("enable-fp16 is not enabled");
+#endif
}
/// @note y and ret_derivative can be same here, so this has to be out-place
+++ /dev/null
-/**
- * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- *
- * @file neuralnet.cpp
- * @date 04 December 2019
- * @brief This is Neural Network Class
- * @see https://github.com/nnstreamer/nntrainer
- * @author Jijoong Moon <jijoong.moon@samsung.com>
- * @bug No known bugs except for NYI items
- *
- */
-
-#include "layer_context.h"
-#include "model_common_properties.h"
-#include <cmath>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-
-#include <activation_realizer.h>
-#include <common_properties.h>
-#include <databuffer.h>
-#include <flatten_realizer.h>
-#include <ini_interpreter.h>
-#include <ini_wrapper.h>
-#include <input_realizer.h>
-#include <model_loader.h>
-#include <multiout_realizer.h>
-#include <neuralnet.h>
-#include <nntrainer_error.h>
-#include <nntrainer_log.h>
-#include <node_exporter.h>
-#include <optimizer_context.h>
-#include <previous_input_realizer.h>
-#include <profiler.h>
-#include <recurrent_realizer.h>
-#include <remap_realizer.h>
-#include <slice_realizer.h>
-#include <util_func.h>
-
-#ifdef ENABLE_TFLITE_INTERPRETER
-#include <tflite_interpreter.h>
-#endif
-
-/**
- * @brief Internal enum values for nntrainer to summarize model accuracy & loss
- */
-#define ML_TRAIN_SUMMARY_MODEL_TRAIN_LOSS 101
-#define ML_TRAIN_SUMMARY_MODEL_VALID_LOSS 102
-#define ML_TRAIN_SUMMARY_MODEL_VALID_ACCURACY 103
-
-namespace nntrainer {
-
-NeuralNetwork::NeuralNetwork() :
- model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm()),
- model_flex_props(
- props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
- props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
- props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
- props::TensorFormat(), props::ModelTensorDataType()),
- load_path(std::string()),
- epoch_idx(0),
- iter(0),
- loss(0.0f),
- data_buffers({nullptr, nullptr, nullptr}),
- initialized(false),
- compiled(false),
- loadedFromConfig(false) {
- app_context = AppContext(AppContext::Global());
-
-}
-
-NeuralNetwork::NeuralNetwork(AppContext app_context_) :
- model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm()),
- model_flex_props(
- props::Epochs(), props::TrainingBatchSize(), props::SavePath(),
- props::ContinueTrain(), props::SaveBestPath(), props::MemoryOptimization(),
- props::MemorySwap(), props::MemorySwapPath(), props::MemorySwapLookahead(),
- props::TensorFormat(), props::ModelTensorDataType()),
- load_path(std::string()),
- epoch_idx(0),
- iter(0),
- loss(0.0f),
- data_buffers({nullptr, nullptr, nullptr}),
- initialized(false),
- compiled(false),
- loadedFromConfig(false),
- app_context(app_context_) {}
-
-int NeuralNetwork::loadFromConfig(const std::string &config) {
- if (loadedFromConfig == true) {
- ml_loge("cannnot do loadFromConfig twice");
- return ML_ERROR_INVALID_PARAMETER;
- }
-
- ModelLoader loader(app_context);
- NeuralNetwork tempNet(*this);
-
- int status = loader.loadFromContext(tempNet);
- if (status != ML_ERROR_NONE) {
- return status;
- }
-
- status = loader.loadFromConfig(config, tempNet);
- if (status != ML_ERROR_NONE) {
- return status;
- }
-
- tempNet.loadedFromConfig = true;
- swap(tempNet, *this);
-
- return ML_ERROR_NONE;
-}
-
-unsigned int NeuralNetwork::getCurrentEpoch() {
-#ifdef DEBUG
- ml_logd("[NNTrainer] Current epoch: %d", epoch_idx);
-#endif
- return epoch_idx;
-};
-
-void NeuralNetwork::setProperty(const std::vector<std::string> &values) {
- auto left_props = loadProperties(values, model_props);
- setTrainConfig(left_props);
-}
-
-void NeuralNetwork::setTrainConfig(const std::vector<std::string> &values) {
- auto left_props = loadProperties(values, model_flex_props);
- NNTR_THROW_IF(left_props.size(), std::invalid_argument)
- << "Model has unparsed properties, size: " << left_props.size()
- << " of first element: " << left_props.front();
-}
-
-int NeuralNetwork::compile() {
- std::string loss_type = std::get<props::LossType>(model_props).empty()
- ? std::string()
- : std::get<props::LossType>(model_props);
-
- auto &input_conn = std::get<std::vector<props::InputConnection>>(model_props);
- /// @note label layer might need to be treated in the similar way as well
-
- /// @todo make NetworkGraph compiled at the construction instead of having
- /// graph.compile(), neuralnetwork have ownership of list of layer nodes,
- /// which will be passed at compile time.
-
- std::vector<std::unique_ptr<GraphRealizer>> realizers;
-
- realizers.emplace_back(new PreviousInputRealizer(
- std::vector<Connection>(input_conn.begin(), input_conn.end())));
- realizers.emplace_back(new MultioutRealizer());
- realizers.emplace_back(new FlattenRealizer());
- realizers.emplace_back(new ActivationRealizer());
-
- for (auto &realizer : realizers) {
- graph_representation = realizer->realize(graph_representation);
- }
-
- bool memory_swap = std::get<props::MemorySwap>(model_flex_props);
- const std::string memory_swap_path =
- std::get<props::MemorySwapPath>(model_flex_props);
- unsigned int lookahead =
- std::get<props::MemorySwapLookahead>(model_flex_props);
-
- const std::string tensor_format =
- to_string(std::get<props::TensorFormat>(model_flex_props));
-
- const std::string tensor_type =
- to_string(std::get<props::ModelTensorDataType>(model_flex_props));
-
- model_graph = NetworkGraph(memory_swap, memory_swap_path, lookahead,
- tensor_format, tensor_type);
-
- model_graph.setMemoryOptimizations(
- std::get<props::MemoryOptimization>(model_flex_props));
- for (auto &node : graph_representation) {
- if (auto &prop = std::get<props::ClipGradByGlobalNorm>(model_props);
- !prop.empty()) {
- node->setProperty({"clip_grad_by_norm=" + to_string(prop)});
- }
- model_graph.addLayer(node);
- }
-
- int status = model_graph.compile(loss_type);
- NN_RETURN_STATUS();
-
- compiled = true;
-
- return status;
-}
-
-int NeuralNetwork::initialize() {
- int status = ML_ERROR_NONE;
-
- if (initialized) {
- ml_loge("Error: Initializing the model again");
- return ML_ERROR_NOT_SUPPORTED;
- }
-
- if (!compiled) {
- ml_loge("Error: Need to compile first");
- return ML_ERROR_NOT_SUPPORTED;
- }
-
- unsigned int n_layers = (unsigned int)model_graph.size();
-
- ml_logd("initializing neural network, layer size: %d", n_layers);
- PROFILE_MEM_ANNOTATE("Initialize");
-
- auto &input_conn_prop =
- std::get<std::vector<props::InputConnection>>(model_props);
- auto &label_layer_prop =
- std::get<std::vector<props::LabelLayer>>(model_props);
-
- std::vector<Connection> input_conn(input_conn_prop.begin(),
- input_conn_prop.end());
- std::vector<std::string> label_layers;
-
- if (!label_layer_prop.empty()) {
- label_layers = std::vector<std::string>(label_layer_prop.begin(),
- label_layer_prop.end());
- }
-
- status = model_graph.initialize(
- input_conn,
- std::vector<Connection>(label_layers.begin(), label_layers.end()));
- NN_RETURN_STATUS();
-
- model_graph.setBatchSize(
- std::get<props::TrainingBatchSize>(model_flex_props));
-
- // initialize optimizer and related variables
- /// @todo: initialize should take a mode and check if mode is train but
- /// optimizer is not given, make it as a hard error
- if (opt) {
- /** TODO: update request of optimizer to be of same format as
- * Layer::requestTensor */
- opt->finalize();
- std::function<std::vector<TensorDim>(const TensorDim &)> cb =
- [this](const TensorDim &dim) {
- return opt->getOptimizerVariableDim(dim);
- };
- model_graph.requestOptimizerVariable(cb, true);
- }
-
- // Allocate weights
- model_graph.allocateWeights();
-
- initialized = true;
-
- if (!load_path.empty()) {
- load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
- }
-
- return status;
-}
-
-/**
- * @brief free layers
- */
-NeuralNetwork::~NeuralNetwork() { deallocate(); }
-
-/**
- * @brief forward propagation using layers object which has layer
- */
-sharedConstTensors NeuralNetwork::forwarding(
- bool training, std::function<bool(void *userdata)> stop_cb, void *userdata) {
- std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op =
- [this, stop_cb, userdata](std::shared_ptr<LayerNode> node,
- bool training) -> void {
- (void)this;
- PROFILE_MEM_ANNOTATE("Forwarding for layer: " + node->getName());
-
- auto f = std::get<0>(node->getExecutionOrder());
- model_graph.flushCacheExcept(f);
-
- node->forwarding(training);
- };
-
- return model_graph.forwarding(training, forwarding_op, stop_cb, userdata);
-}
-
-/**
- * @brief forward propagation using layers object which has layer
- */
-sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input,
- sharedConstTensors label,
- bool training) {
- auto current_batch = model_graph.getBatchSize();
- NNTR_THROW_IF(input[0]->batch() != current_batch ||
- (!label.empty() && label[0]->batch() != current_batch),
- std::logic_error)
- << "Error: mismatch in batchsize for data and model."
- << " input_batch: " << input[0]->batch()
- << " label_batch: " << label[0]->batch()
- << " target_batch: " << current_batch;
-
- model_graph.setInputsLabels(input, label);
-
- return forwarding(training);
-}
-
-/**
- * @brief back propagation
- * Call backwarding function of layer in reverse order
- * No need to call at first Input Layer (No data to be updated)
- */
-void NeuralNetwork::backwarding(int iteration,
- std::function<bool(void *userdata)> stop_cb,
- void *userdata) {
-
-#ifdef DEBUG
- NNTR_THROW_IF(!opt, std::invalid_argument) << "optimizer is null!";
-#endif
-
- std::function<void(std::shared_ptr<LayerNode>, int)> backwarding_op =
- [this, stop_cb, userdata](std::shared_ptr<LayerNode> node,
- int iteration) -> void {
- /**
- * Do not change this order:
- * 1. calcGradient
- * 2. calcDerivative
- * 3. applyGradient
- * 4. gradientClippingOnLastAccess
- */
-
- model_graph.flushCacheExcept(std::get<1>(node->getExecutionOrder()));
- PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
-
- bool apply_gradient = true;
- if (node->getTrainable()) {
- /** If gradient optimization mode, then calculate gradient first */
- if (dynamic_training_opt.isGradientMode())
- node->calcGradient();
-
- /**
- * If optimization off, or gradient must be applied, then this will be
- * true
- * @todo This apply gradient should be passed to the each weight and later
- * be queried when updating gradient at once. (after moving apply_gradient
- * out of this function)
- *
- */
- // auto &layer = node->getObject();
- // apply_gradient = dynamic_training_opt.checkIfApply(
- // layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
- // opt, iteration);
-
- /** If gradient must be applied and its not gradient mode, calculate
- * gradient
- */
- if (!dynamic_training_opt.isGradientMode() && apply_gradient)
- node->calcGradient();
- }
-
- model_graph.flushCacheExcept(std::get<2>(node->getExecutionOrder()));
- PROFILE_MEM_ANNOTATE("CalcDerivative: " + node->getName());
-
- if (stop_cb(userdata)) {
- return;
- }
-
- if (node->needsCalcDerivative())
- node->calcDerivative();
-
- model_graph.flushCacheExcept(std::get<3>(node->getExecutionOrder()));
- PROFILE_MEM_ANNOTATE("ApplyGradient: " + node->getName());
-
- if (apply_gradient) {
- /// Apply gradient only at the end of the last shared weight access
- model_graph.applyGradients(
- node.get(), [iteration, opt_ = opt.get()](Weight &w) {
- w.calcRegularizationGradient();
- w.calcWeightDecayGradient();
- RunOptimizerContext opt_context(&w, iteration,
- opt_->getLearningRate(iteration));
- opt_->applyGradient(opt_context);
- });
- }
- };
-
- std::function<void(Weight &, int)> apply_grad_clip_op =
- [opt_ = opt.get()](Weight &w, int iteration) -> void {
- w.calcRegularizationGradient();
- w.calcWeightDecayGradient();
- RunOptimizerContext opt_context(&w, iteration,
- opt_->getLearningRate(iteration));
- opt_->applyGradient(opt_context);
- };
-
- model_graph.backwarding(iteration, backwarding_op, apply_grad_clip_op,
- stop_cb, userdata);
-}
-
-void NeuralNetwork::save(const std::string &file_path,
- ml::train::ModelFormat format) {
- NNTR_THROW_IF(!initialized, std::runtime_error)
- << "Cannot save model if not initialized yet, path: " << file_path
- << " format: " << static_cast<unsigned>(format);
-
- /// @todo this switch case should be delegating the function call only. It's
- /// not delegating for now as required logics are managable for now.
- switch (format) {
- case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
- auto model_file = checkedOpenStream<std::ofstream>(
- file_path, std::ios::out | std::ios::binary | std::ios::trunc);
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- (*iter)->save(model_file);
- }
- if (opt && istrequal(opt->getType(), "adam")) {
- std::string adam = "adam";
- model_file.write(adam.c_str(), 4);
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
- iter++) {
- (*iter)->save(model_file, true);
- }
- }
-
- model_file.write((char *)&epoch_idx, sizeof(epoch_idx));
- model_file.write((char *)&iter, sizeof(iter));
-
- model_file.close();
- break;
- }
- case ml::train::ModelFormat::MODEL_FORMAT_INI:
- saveModelIni(file_path);
- break;
-
- case ml::train::ModelFormat::MODEL_FORMAT_INI_WITH_BIN: {
- auto old_save_path = std::get<props::SavePath>(model_flex_props);
- auto bin_file_name =
- file_path.substr(0, file_path.find_last_of('.')) + ".bin";
-
- std::get<props::SavePath>(model_flex_props).set(bin_file_name);
- save(file_path, ml::train::ModelFormat::MODEL_FORMAT_INI);
- save(bin_file_name, ml::train::ModelFormat::MODEL_FORMAT_BIN);
- std::get<props::SavePath>(model_flex_props) = old_save_path;
- break;
- }
- default:
- throw nntrainer::exception::not_supported(
- "saving with given format is not supported yet");
- }
-}
-
-void NeuralNetwork::load(const std::string &file_path,
- ml::train::ModelFormat format) {
- /// @todo this switch case should be delegating the function call only. It's
- /// not delegating for now as required logics are managable for now.
- switch (format) {
- case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
- NNTR_THROW_IF(!initialized, std::runtime_error)
- << "Cannot load if not initialized yet, path: " << file_path
- << " format: " << static_cast<unsigned>(format);
-
- auto model_file = checkedOpenStream<std::ifstream>(
- file_path, std::ios::in | std::ios::binary);
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- (*iter)->read(model_file);
- }
- try {
- /// this is assuming that the failure is allowed at the end of the file
- /// read. so, after this line, additional read shouldn't be called
- if (opt && istrequal(opt->getType(), "adam")) {
- std::string opt_type;
- opt_type.resize(4);
- model_file.read((char *)&opt_type[0], 4);
- if (istrequal(opt_type, "adam")) {
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
- iter++) {
- (*iter)->read(model_file, true);
- }
- }
- }
-
- checkedRead(model_file, (char *)&epoch_idx, sizeof(epoch_idx),
- "[NeuralNetwork::readModel] failed to read epoch_idx");
- checkedRead(model_file, (char *)&iter, sizeof(iter),
- "[NeuralNetwork::readModel] failed to read iteration");
- } catch (...) {
- std::cerr << "failed to read additional data like optimizer variable, "
- "iteration, proceeding with default\n";
- }
-
- ml_logi("read modelfile: %s", file_path.c_str());
- break;
- }
- case ml::train::ModelFormat::MODEL_FORMAT_INI_WITH_BIN: {
- int ret = loadFromConfig(file_path);
- throw_status(ret);
- auto &save_path = std::get<props::SavePath>(model_flex_props);
- if (!save_path.empty()) {
- checkedOpenStream<std::ifstream>(save_path,
- std::ios::in | std::ios::binary);
- load_path = save_path;
- }
- break;
- }
- case ml::train::ModelFormat::MODEL_FORMAT_INI: {
- int ret = loadFromConfig(file_path);
- throw_status(ret);
- break;
- }
- case ml::train::ModelFormat::MODEL_FORMAT_FLATBUFFER: {
- break;
- }
- default:
- throw nntrainer::exception::not_supported(
- "loading with given format is not supported yet");
- }
-}
-
-float NeuralNetwork::getLoss() {
- loss = 0.0f;
-
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- loss += (*iter)->getLoss();
- }
- return loss;
-}
-
-void NeuralNetwork::setLoss(float l) { loss = l; }
-
-NeuralNetwork &NeuralNetwork::copy(NeuralNetwork &from) {
- if (this != &from) {
- model_props = from.model_props;
- model_flex_props = from.model_flex_props;
- loss = from.loss;
- opt = from.opt;
-
- model_graph.copy(from.model_graph);
- }
- return *this;
-}
-
-void NeuralNetwork::saveModelIni(const std::string &file_path) {
- NNTR_THROW_IF(isFileExist(file_path), std::invalid_argument)
- << "There is already a file, overriding to the exisiting file is not "
- "permitted, path: "
- << file_path;
-
- std::vector<IniSection> sections;
-
- IniSection model_section = IniSection::FromExportable("model", *this);
- model_section.setEntry("type", "NeuralNetwork");
- sections.push_back(model_section);
-
- auto add_section_if_any = [§ions](const std::string §ion_name,
- auto obj_ptr, auto pred) {
- if (pred(obj_ptr)) {
- IniSection s = IniSection::FromExportable(section_name, *obj_ptr);
- s.setEntry("type", obj_ptr->getType());
- sections.push_back(s);
- }
- };
-
- add_section_if_any("optimizer", opt,
- [](const auto &obj) { return static_cast<bool>(obj); });
-
- auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
- auto data_buffer_valid = [](const auto &buffer) {
- return buffer && buffer->isSerializable(
- ml::train::ExportMethods::METHOD_STRINGVECTOR);
- };
-
- add_section_if_any("train_set", train_buffer, data_buffer_valid);
- add_section_if_any("valid_set", valid_buffer, data_buffer_valid);
- add_section_if_any("test_set", test_buffer, data_buffer_valid);
-
- IniWrapper wrapper("model_saver", sections);
- wrapper.save_ini(file_path);
-
- IniGraphInterpreter interpreter;
- interpreter.serialize(graph_representation, file_path);
-}
-
-bool NeuralNetwork::validateInput(sharedConstTensors X) {
- auto input_dim = getInputDimension();
- if (X.size() != input_dim.size()) {
- ml_loge("Error: provided number of inputs %d, required %d", (int)X.size(),
- (int)input_dim.size());
- return false;
- }
-
- for (unsigned int dim = 0; dim < input_dim.size(); dim++) {
- if (input_dim[dim] != X[dim]->getDim()) {
- ml_loge("Error: provided input shape does not match required shape");
- std::stringstream ss;
- ss << X[dim]->getDim();
- ml_loge("Provided tensor summary : %s", ss.str().c_str());
-
- ss.str(std::string());
- ss << input_dim[dim];
- ml_loge("Required tensor summary : %s", ss.str().c_str());
- return false;
- }
- }
-
- return true;
-}
-
-sharedConstTensors NeuralNetwork::inference(sharedConstTensors X,
- bool free_mem) {
- return inference(X, {}, free_mem);
-}
-
-sharedConstTensors NeuralNetwork::inference(sharedConstTensors X,
- sharedConstTensors label,
- bool free_mem) {
- if (model_graph.getBatchSize() != X[0]->batch()) {
- model_graph.setBatchSize(X[0]->batch());
- }
-
- sharedConstTensors out;
- if (!validateInput(X))
- throw std::invalid_argument("Input validation failed.");
-
- allocate(ExecutionMode::INFERENCE);
-
- int nn_foward;
- PROFILE_TIME_REGISTER_EVENT(nn_foward, "nn_forward");
- PROFILE_TIME_START(nn_foward);
- out = forwarding(X, label, false);
- PROFILE_TIME_END(nn_foward);
-
- if (free_mem)
- /**
- * Free the memory needed for training before exiting.
- * Note that this does not free the weights for the model.
- * Weights of the model will be freed when the model is destroyed.
- */
- model_graph.deallocateTensors(false);
-
- /** Clear the set inputs and labels */
- model_graph.setInputsLabels({}, {});
-
- return out;
-}
-
-std::vector<float *>
-NeuralNetwork::inference(unsigned int batch_size,
- const std::vector<float *> &input,
- const std::vector<float *> &label) {
- sharedConstTensors input_tensors, output_tensors;
- auto in_dim = getInputDimension();
-
- input_tensors.reserve(input.size());
- for (unsigned int idx = 0; idx < in_dim.size(); idx++) {
- in_dim[idx].batch(batch_size);
- input_tensors.emplace_back(MAKE_SHARED_TENSOR(Tensor::Map(
- input[idx], in_dim[idx].getDataLen() * sizeof(float), in_dim[idx], 0)));
- }
-
- if (!label.empty()) {
- sharedConstTensors label_tensors;
- auto label_dim = getOutputDimension();
- label_tensors.reserve(label.size());
- for (unsigned int idx = 0; idx < label_dim.size(); idx++) {
- label_dim[idx].batch(batch_size);
- label_tensors.emplace_back(MAKE_SHARED_TENSOR(
- Tensor::Map(label[idx], label_dim[idx].getDataLen() * sizeof(float),
- label_dim[idx], 0)));
- }
- output_tensors = inference(input_tensors, label_tensors, false);
- } else {
- output_tensors = inference(input_tensors, false);
- }
-
- std::vector<float *> output;
- output.reserve(output_tensors.size());
-
- for (auto &out : output_tensors) {
- auto out_t = *out.get();
- output.push_back(out_t.getData());
- }
-
- return output;
-}
-
-int NeuralNetwork::setDataset(const DatasetModeType &mode,
- std::shared_ptr<ml::train::Dataset> dataset) {
- return setDataBuffer(mode, std::static_pointer_cast<DataBuffer>(dataset));
-}
-
-int NeuralNetwork::allocate(ExecutionMode mode) {
- model_graph.deallocateTensors();
- model_graph.allocateTensors(mode);
-
- return ML_ERROR_NONE;
-}
-
-int NeuralNetwork::deallocate() {
- model_graph.deallocateTensors(true);
-
- return ML_ERROR_NONE;
-}
-
-int NeuralNetwork::train(const std::vector<std::string> &values,
- std::function<bool(void *)> stop_cb,
- void *stop_user_data,
- std::function<void(void *)> epoch_complete_cb,
- void *epoch_user_data) {
- int status = ML_ERROR_NONE;
-
- if (data_buffers[static_cast<int>(DatasetModeType::MODE_TRAIN)] == nullptr) {
- ml_loge("Cannot initialize the model without the train data buffer.");
- return ML_ERROR_INVALID_PARAMETER;
- }
-
- if (!opt) {
- ml_loge("Cannot train network without optimizer.");
- return ML_ERROR_INVALID_PARAMETER;
- }
-
- setTrainConfig(values);
-
- /** set batch size just before training */
- model_graph.setBatchSize(
- std::get<props::TrainingBatchSize>(model_flex_props));
-
- status = allocate(ExecutionMode::TRAIN);
- NN_RETURN_STATUS();
-
- status =
- train_run(stop_cb, stop_user_data, epoch_complete_cb, epoch_user_data);
- NN_RETURN_STATUS();
-
- /**
- * Free the memory needed for training before exiting.
- * Note that this does not free the weights for the model.
- * Weights of the model will be freed when the model is destroyed.
- */
- model_graph.deallocateTensors(false);
- return status;
-}
-
-/**
- * @brief Run NeuralNetwork train with callback function by user
- */
-int NeuralNetwork::train_run(
- std::function<bool(void *userdata)> stop_cb, void *stop_user_data,
- std::function<void(void *userdata)> epoch_complete_cb,
- void *epoch_user_data) {
- int status = ML_ERROR_NONE;
-
- if (!std::get<props::ContinueTrain>(model_flex_props)) {
- epoch_idx = 0;
- iter = 0;
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- (*iter)->clearOptVar();
- }
- }
-
- auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
-
- auto const &outputs = model_graph.getOutputTensors();
- auto in_dims = model_graph.getInputDimension();
- auto label_dims = model_graph.getOutputDimension();
-
- auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
-
- if (train_buffer == nullptr) {
- ml_loge("[NeuralNetworks] there is no train dataset!");
- return ML_ERROR_INVALID_PARAMETER;
- }
-
- /**
- * @brief run a single epoch with given callback, @a auto is used instead of
- * std::function for performance measure
- * @param buffer buffer to run
- * @param shuffle whether to shuffle or not
- * @param on_iteration_fetch function that will recieve reference to stat,
- * buffer which will be called every time data is fetched and set
- * @param on_epoch_end function that will recieve reference to stat,
- * buffer which will be called on the epoch end
- */
- auto run_epoch = [this, &in_dims, &label_dims, &outputs, batch_size](
- DataBuffer *buffer, bool shuffle,
- auto &&on_iteration_fetch, auto &&on_iteration_update_stat,
- auto &&on_epoch_end, RunStats &stat) {
- /// @todo managing metrics must be handled here as well!! for now it is
- /// handled in individual callbacks
- // RunStats stat;
-
- stat.accuracy = 0.0;
- stat.loss = 0.0;
- stat.num_iterations = 0;
- stat.num_correct_predictions = 0;
- stat.max_epoch = getEpochs();
- stat.epoch_idx = epoch_idx;
-
- std::future<std::shared_ptr<IterationQueue>> future_iq =
- buffer->startFetchWorker(in_dims, label_dims, shuffle);
- while (true) {
- ScopedView<Iteration> iter_view = buffer->fetch();
- if (iter_view.isEmpty()) {
- break;
- }
- auto &iteration = iter_view.get();
- if (iteration.batch() != batch_size) {
- /// @todo support partial batch
- continue;
- }
-
- auto const &labels = iteration.getLabelsRef();
- auto const &inputs = iteration.getInputsRef();
- model_graph.setInputsLabels(inputs, labels);
-
- on_iteration_fetch(stat, *buffer);
- on_iteration_update_stat(stat, outputs, labels);
- }
- future_iq.get();
- on_epoch_end(stat, *buffer);
-
- if (stat.num_iterations == 0) {
- throw std::runtime_error("No data came while buffer ran");
- }
-
- return stat;
- };
-
- auto train_for_iteration =
- [this, stop_cb, stop_user_data](RunStats &stat, DataBuffer &buffer) {
- forwarding(true, stop_cb, stop_user_data);
- backwarding(iter++, stop_cb, stop_user_data);
-
- // To avoid unconsidered memory leak, we need to clear the cache
- model_graph.flushCache();
-
- if (!stop_cb(stop_user_data)) {
- std::cout << "#" << epoch_idx << "/" << getEpochs();
- ml_logi("# %d / %d", epoch_idx, getEpochs());
- auto loss = getLoss();
- buffer.displayProgress(stat.num_iterations, loss);
- }
- };
-
- auto update_train_stat = [this](RunStats &stat,
- const std::vector<Tensor> &outputs,
- const std::vector<Tensor> &labels) {
- stat.loss += getLoss();
- stat.num_iterations++;
- };
-
- auto train_epoch_end = [this, stop_cb, stop_user_data](RunStats &stat,
- DataBuffer &buffer) {
- if (stat.num_iterations != 0) {
- stat.loss /= static_cast<float>(stat.num_iterations);
- } else {
- std::cerr << "stat.num_iterations is 0" << std::endl;
- return;
- }
- auto &save_path = std::get<props::SavePath>(model_flex_props);
- if (!stop_cb(stop_user_data)) {
- if (!save_path.empty()) {
- save(save_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
- }
-
- std::cout << "#" << epoch_idx << "/" << getEpochs()
- << " - Training Loss: " << stat.loss;
- ml_logi("# %d / %d - Training Loss: %f", epoch_idx, getEpochs(),
- stat.loss);
- ml_logd("[NNTrainer] Training epoch %d / %d finished successfully.",
- epoch_idx, getEpochs());
- } else {
- ml_logd("[NNTrainer] Training stopped by stop callback function during "
- "epoch %d.",
- epoch_idx);
- }
- };
-
- auto eval_for_iteration = [this, batch_size, stop_cb, stop_user_data](
- RunStats &stat, DataBuffer &buffer) {
- forwarding(false, stop_cb, stop_user_data);
- };
-
- auto update_eval_stat = [batch_size, &update_train_stat](
- RunStats &stat, const std::vector<Tensor> &outputs,
- const std::vector<Tensor> &labels) {
- auto model_out = outputs[0].argmax();
- auto label_out = labels[0].argmax();
-
- for (unsigned int b = 0; b < batch_size; b++) {
- if (model_out[b] == label_out[b])
- stat.num_correct_predictions++;
- }
-
- update_train_stat(stat, outputs, labels);
- };
-
- auto eval_epoch_end = [this, batch_size, max_acc = 0.0f,
- min_loss = std::numeric_limits<float>::max()](
- RunStats &stat, DataBuffer &buffer) mutable {
- if (stat.num_iterations != 0) {
- stat.loss /= static_cast<float>(stat.num_iterations);
- } else {
- std::cerr << "stat.num_iterations is 0" << std::endl;
- return;
- }
- stat.accuracy = stat.num_correct_predictions /
- static_cast<float>(stat.num_iterations * batch_size) *
- 100.0f;
-
- if (stat.accuracy > max_acc ||
- (stat.accuracy == max_acc && stat.loss < min_loss)) {
- max_acc = stat.accuracy;
- /// @note this is not actually 'the' min loss for whole time but records
- /// when data change
- min_loss = stat.loss;
- auto &save_best_path = std::get<props::SaveBestPath>(model_flex_props);
- if (!save_best_path.empty()) {
- save(save_best_path);
- }
- }
- std::cout << " >> [ Accuracy: " << stat.accuracy
- << "% - Validation Loss : " << stat.loss << " ]";
- ml_logi("[ Accuracy: %.2f %% - Validataion Loss: %.5f", stat.accuracy,
- stat.loss);
- };
-
- PROFILE_MEM_ANNOTATE("TRAIN START");
- auto epochs = getEpochs();
- ml_logd("[NNTrainer] Starts training. Current epoch: %d. Total epochs: %d.",
- epoch_idx + 1, getEpochs());
- for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
- if (stop_cb(stop_user_data)) {
- --epoch_idx;
- break;
- }
- training = run_epoch(train_buffer.get(), true, train_for_iteration,
- update_train_stat, train_epoch_end, training);
- if (valid_buffer) {
- validation = run_epoch(valid_buffer.get(), false, eval_for_iteration,
- update_eval_stat, eval_epoch_end, validation);
- }
- std::cout << '\n';
- epoch_complete_cb(epoch_user_data);
- }
- PROFILE_MEM_ANNOTATE("TRAIN END");
-
- if (test_buffer) {
- std::cout << "Evaluation with test data...\n";
- testing = run_epoch(test_buffer.get(), false, eval_for_iteration,
- update_eval_stat, eval_epoch_end, testing);
- }
-
- /** Clear the set inputs and labels */
- model_graph.setInputsLabels({}, {});
-
- return status;
-}
-
-void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) {
- {
- using std::swap;
-
- swap(lhs.model_props, rhs.model_props);
- swap(lhs.model_flex_props, rhs.model_flex_props);
- swap(lhs.load_path, rhs.load_path);
- swap(lhs.epoch_idx, rhs.epoch_idx);
- swap(lhs.iter, rhs.iter);
- swap(lhs.loss, rhs.loss);
- swap(lhs.opt, rhs.opt);
- swap(lhs.data_buffers, rhs.data_buffers);
- swap(lhs.initialized, rhs.initialized);
- swap(lhs.model_graph, rhs.model_graph);
- swap(lhs.graph_representation, rhs.graph_representation);
- swap(lhs.compiled, rhs.compiled);
- swap(lhs.loadedFromConfig, rhs.loadedFromConfig);
- }
-}
-
-int NeuralNetwork::addLayer(NodeType layer) {
- int status = ML_ERROR_NONE;
-
- if (initialized) {
- return ML_ERROR_NOT_SUPPORTED;
- }
-
- /** Insert the layer to the graph */
- model_graph.addLayer(layer);
- graph_representation.push_back(layer);
-
- return status;
-}
-
-NeuralNetwork &NeuralNetwork::copyConfiguration(NeuralNetwork &from) {
- if (this != &from) {
- model_props = from.model_props;
- model_flex_props = from.model_flex_props;
- loss = from.loss;
- opt = from.opt;
-
- NetworkGraph f_graph = from.getNetworkGraph();
- for (auto &l_node : f_graph.getLayerNodes()) {
- addLayer(static_cast<std::shared_ptr<ml::train::Layer>>(
- l_node->cloneConfiguration()));
- }
- }
- return *this;
-}
-
-NeuralNetwork::GraphType
-NeuralNetwork::getUnsortedLayers(const std::string &input_layer,
- const std::string &output_layer) {
- return model_graph.getUnsortedLayers(input_layer, output_layer);
-}
-
-int NeuralNetwork::setOptimizer(
- std::shared_ptr<ml::train::Optimizer> optimizer) {
- if (initialized) {
- return ML_ERROR_NOT_SUPPORTED;
- }
-
- opt = std::static_pointer_cast<OptimizerWrapped>(optimizer);
-
- return ML_ERROR_NONE;
-}
-
-int NeuralNetwork::setDataBuffer(const DatasetModeType &mode,
- std::shared_ptr<DataBuffer> data_buffer) {
- if (data_buffer == nullptr) {
- return ML_ERROR_INVALID_PARAMETER;
- }
-
- this->data_buffers[static_cast<int>(mode)] = data_buffer;
-
- return ML_ERROR_NONE;
-}
-
-int NeuralNetwork::getLayer(const char *name,
- std::shared_ptr<ml::train::Layer> *layer) {
- // We provide the layer change through the api with user's responsibility.
- //
- // if (compiled) {
- // ml_loge("Cannot get compiled layer.");
- // return ML_ERROR_NOT_SUPPORTED;
- // }
-
- *layer = std::static_pointer_cast<ml::train::Layer>(
- model_graph.getLayerNode(std::string(name)));
- return ML_ERROR_NONE;
-}
-
-void NeuralNetwork::printMetrics(std::ostream &out, unsigned int flags) {
- switch (flags) {
- case ML_TRAIN_SUMMARY_MODEL_TRAIN_LOSS:
- out << training.loss << std::endl;
- break;
-
- case ML_TRAIN_SUMMARY_MODEL_VALID_LOSS:
- out << validation.loss << std::endl;
- break;
-
- case ML_TRAIN_SUMMARY_MODEL_VALID_ACCURACY:
- out << validation.accuracy << std::endl;
- break;
-
- default:
- break;
- }
-}
-
-void NeuralNetwork::printPreset(std::ostream &out, unsigned int preset) {
- /** print neuralnet metrics */
- printMetrics(out, preset);
- if (preset > ML_TRAIN_SUMMARY_TENSOR)
- return;
-
- LayerNode::PrintPreset layer_preset = LayerNode::PrintPreset::PRINT_NONE;
-
- ///@todo match flags with preset
- unsigned int flags = PRINT_INST_INFO | PRINT_GRAPH_INFO | PRINT_PROP |
- PRINT_OPTIMIZER | PRINT_METRIC;
-
- switch (preset) {
- case ML_TRAIN_SUMMARY_TENSOR:
- layer_preset = LayerNode::PrintPreset::PRINT_ALL;
- break;
- case ML_TRAIN_SUMMARY_LAYER:
- layer_preset = initialized ? LayerNode::PrintPreset::PRINT_SUMMARY
- : LayerNode::PrintPreset::PRINT_SUMMARY_META;
- break;
- case ML_TRAIN_SUMMARY_MODEL:
- break;
- default:
- throw std::invalid_argument("given verbosity is invalid");
- }
-
- print(out, flags, layer_preset);
-}
-
-void NeuralNetwork::addWithReferenceLayers(
- const std::vector<std::shared_ptr<ml::train::Layer>> &reference,
- const std::string &scope, const std::vector<std::string> &input_layers,
- const std::vector<std::string> &start_layers,
- const std::vector<std::string> &end_layers,
- ml::train::ReferenceLayersType type,
- const std::vector<std::string> &type_properties) {
- std::vector<NodeType> casted_reference;
- casted_reference.reserve(reference.size());
- for (auto &node : reference) {
- casted_reference.emplace_back(std::static_pointer_cast<LayerNode>(node));
- }
-
- addWithReferenceLayers(casted_reference, scope, input_layers, start_layers,
- end_layers, type, type_properties);
-}
-void NeuralNetwork::addWithReferenceLayers(
- const std::vector<std::shared_ptr<LayerNode>> &reference,
- const std::string &scope, const std::vector<std::string> &input_layers,
- const std::vector<std::string> &start_layers,
- const std::vector<std::string> &end_layers,
- ml::train::ReferenceLayersType type,
- const std::vector<std::string> &type_properties) {
- /// @todo below configuration should be extracted as a free function to make
- /// it more testable, and reused inside graph interpreter
-
- /// @note we can exploit connection to connection more fine grained, for now
- /// it is not supported but we can easily make this supported
- std::vector<std::shared_ptr<LayerNode>> nodes;
- nodes.reserve(reference.size());
- for (auto &node : reference) {
- nodes.push_back(node->cloneConfiguration());
- }
-
- auto start_conns =
- std::vector<Connection>(start_layers.begin(), start_layers.end());
- auto input_conns =
- std::vector<Connection>(input_layers.begin(), input_layers.end());
- auto end_conns =
- std::vector<Connection>(end_layers.begin(), end_layers.end());
-
- std::vector<std::unique_ptr<GraphRealizer>> realizers;
-
- realizers.emplace_back(new PreviousInputRealizer(start_conns));
- realizers.emplace_back(new SliceRealizer(start_conns, end_conns));
-
- if (!input_conns.empty()) {
- realizers.emplace_back(new InputRealizer(start_conns, input_conns));
- }
-
- if (type == ml::train::ReferenceLayersType::RECURRENT) {
- realizers.emplace_back(
- new RecurrentRealizer(type_properties, input_conns, end_conns));
- }
-
- if (!scope.empty()) {
- realizers.emplace_back(
- new RemapRealizer([&scope, &input_conns](std::string &name) {
- for (auto &i : input_conns) {
- if (i.getName() == name) {
- return;
- }
- }
- name = scope + "/" + name;
- }));
- }
-
- for (auto &realizer : realizers) {
- nodes = realizer->realize(nodes);
- }
-
- for (auto &node : nodes) {
- addLayer(node);
- }
-}
-
-void NeuralNetwork::exportTo(Exporter &exporter,
- const ml::train::ExportMethods &method) const {
- exporter.saveResult(model_props, method, this);
- exporter.saveResult(model_flex_props, method, this);
-}
-
-void NeuralNetwork::print(std::ostream &out, unsigned int flags,
- LayerNode::PrintPreset layerPrintPreset) {
- if (flags & PRINT_INST_INFO) {
- /// @todo uncomment this after implement getProperty (#1875)
- // out << "===================";
- // printInstance(out, this);
- }
-
- if (flags & PRINT_GRAPH_INFO) {
- unsigned int total_col_size = 80;
- std::vector<unsigned int> column_size = {20, 20, 20, 20};
- auto print_graph_layer_info =
- [column_size](std::ostream &out, std::vector<std::string> layer_info) {
- auto trim_string = [](std::string str, unsigned int column_width) {
- return str.size() < column_width ? str
- : str.substr(0, column_width - 1);
- };
-
- for (unsigned int i = 0; i < column_size.size(); ++i) {
- out << std::setw(column_size[i])
- << trim_string(layer_info[i], column_size[i]);
- }
- out << "\n";
- };
-
- out << std::string(total_col_size, '=') << '\n';
- print_graph_layer_info(
- out, {"Layer name", "Layer type", "Input dimension", "Input layer"});
- out << std::string(total_col_size, '=') << '\n';
- if (compiled) {
- props::GenericShape dim_property;
-
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
- iter++) {
- std::string first_dim;
- if (iter->getInputDimensions().empty()) {
- first_dim = "";
- } else {
- dim_property.set(iter->getInputDimensions()[0]);
- first_dim = to_string(dim_property);
- }
- const std::vector<std::string> &input_layer_names =
- iter->getInputConnections();
- std::string first_input_name =
- input_layer_names.empty() ? "" : input_layer_names[0];
- print_graph_layer_info(
- out, {iter->getName(), iter->getType(), first_dim, first_input_name});
- for (unsigned int i = 1; i < input_layer_names.size(); ++i) {
- dim_property.set(iter->getInputDimensions()[i]);
- print_graph_layer_info(
- out, {"", "", to_string(dim_property), input_layer_names[i]});
- }
- out << std::string(total_col_size,
- iter == model_graph.cend() - 1 ? '=' : '-')
- << '\n';
- }
- } else {
- auto &input_connection =
- std::get<std::vector<props::InputConnection>>(model_props);
- auto model_input = std::vector<Connection>(input_connection.begin(),
- input_connection.end());
- auto is_actually_an_input_node =
- [model_input](graph_const_iterator<LayerNode> node) {
- return node->hasInputShapeProperty() or
- std::any_of(model_input.begin(), model_input.end(),
- [node](auto &conn) {
- return node->getName() == conn.getName();
- });
- };
-
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
- iter++) {
- const std::vector<std::string> &input_layer_names =
- iter->getInputConnections();
-
- /// @brief connection information.
- // Intended comment.
- // std::string first_input_name =
- // input_layer_names.empty()
- // ? (is_actually_an_input_node(iter) || iter ==
- // model_graph.cbegin()
- // ? ""
- // : (iter - 1)->getName())
- // : input_layer_names[0];
- print_graph_layer_info(out, {iter->getName(), iter->getType(), "", ""});
- for (unsigned int i = 1; i < input_layer_names.size(); ++i) {
- print_graph_layer_info(out, {"", "", "", ""});
- }
- out << std::string(total_col_size,
- iter == model_graph.cend() - 1 ? '=' : '-')
- << '\n';
- }
- }
- }
-
- if (flags & PRINT_PROP) {
- /// @todo print neuralnet property
- /// @todo print mode (if it is eval or training)
- }
-
- if (flags & PRINT_OPTIMIZER) {
- /// @todo print optimizer (with print optimizer prop)
- }
-
- if (flags & PRINT_METRIC) {
- /// @todo print metric (currently it is done at printPreset as a
- /// workaround)
- /// @todo print loss function when it is not initialized. (if it is
- /// initialized, loss layer will be printed)
- }
-
- if (model_graph.empty()) {
- out << "model is empty!" << std::endl;
- return;
- }
-
- /** print layer properties */
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++)
- (*iter)->printPreset(out, layerPrintPreset);
-
- /// @todo Add status to check neuralnet has been run. #290
-}
-
-void NeuralNetwork::forEachLayer(
- std::function<void(ml::train::Layer &, RunLayerContext &, void *)> fn,
- void *user_data) {
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- auto ln = std::static_pointer_cast<LayerNode>(*iter).get();
- fn(*ln, std::forward<RunLayerContext &>(ln->getRunContext()), user_data);
- };
-}
-
-void NeuralNetwork::exports(const ml::train::ExportMethods &method,
- const std::string file_path) {
- switch (method) {
- case ml::train::ExportMethods::METHOD_TFLITE: {
-#ifdef ENABLE_TFLITE_INTERPRETER
- nntrainer::TfliteInterpreter interpreter;
-
- /// We will call "serialize" method for the model which is already trained
- /// or allocated. So, we need to call deallocateTensors first to make sure
- /// `dealloc_weights == false`
- model_graph.deallocateTensors();
- model_graph.allocateTensors(ExecutionMode::INFERENCE);
- interpreter.serialize(graph_representation, file_path);
- model_graph.deallocateTensors();
-#else
- throw std::runtime_error{
- "Export methods METHOD_TFLITE is not supported. Please enable tflite "
- "interpreter by set ENABLE_TFLITE_INTERPRETER=1"};
-#endif
- break;
- }
- case ml::train::ExportMethods::METHOD_FLATBUFFER: {
-
- model_graph.deallocateTensors();
- model_graph.allocateTensors(ExecutionMode::TRAIN);
- break;
- }
- default:
- throw std::runtime_error{"Unsupported export method"};
- }
-}
-} /* namespace nntrainer */
+++ /dev/null
-/**
- * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- *
- * @file neuralnet.h
- * @date 04 December 2019
- * @brief This is Neural Network Class
- * @see https://github.com/nnstreamer/nntrainer
- * @author Jijoong Moon <jijoong.moon@samsung.com>
- * @bug No known bugs except for NYI items
- *
- */
-#ifndef __NEURALNET_H__
-#define __NEURALNET_H__
-#ifdef __cplusplus
-
-#include <array>
-#include <map>
-#include <memory>
-#include <tuple>
-#include <vector>
-#ifdef PROFILE
-#include <chrono>
-#endif
-
-#include <app_context.h>
-#include <common_properties.h>
-#include <compiler_fwd.h>
-#include <dynamic_training_optimization.h>
-#include <execution_mode.h>
-#include <layer_node.h>
-#include <model_common_properties.h>
-#include <network_graph.h>
-#include <optimizer_wrapped.h>
-#include <tensor.h>
-
-#include <model.h>
-#include <nntrainer-api-common.h>
-#include <nntrainer_error.h>
-#include <node_exporter.h>
-
-namespace ml::train {
-class DataSet;
-enum class DatasetType;
-enum class DatasetModeType;
-} // namespace ml::train
-
-namespace nntrainer {
-
-class Exporter;
-
-/**
- * @brief Enumeration of Network Type
- */
-using NetType = ml::train::ModelType;
-
-class DataBuffer;
-using DatasetType = ml::train::DatasetType;
-using DatasetModeType = ml::train::DatasetModeType;
-using RunStats = ml::train::RunStats;
-
-/**
- * @class NeuralNetwork Class
- * @brief NeuralNetwork Class which has Network Configuration & Layers
- */
-class NeuralNetwork : public ml::train::Model {
- friend class ModelLoader; /** access private members of ModelLoader */
-
-public:
- using NodeType = std::shared_ptr<LayerNode>; /** Type of a Node */
- using GraphType = std::vector<NodeType>; /** actual graph type */
- using FlatGraphType =
- std::vector<NodeType>; /** topological sorted, iterable 1-D list of nodes */
- using NetworkGraphType = nntrainer::NetworkGraph;
-
-
- /**
- * @brief Constructor of NeuralNetwork Class
- */
- NeuralNetwork();
-
- /**
- * @brief Constructor of NeuralNetwork Class
- */
- NeuralNetwork(AppContext app_context_);
-
- /**
- * @brief Destructor of NeuralNetwork Class
- */
- ~NeuralNetwork();
-
- /**
- * @brief Get Loss from the previous ran batch of data
- * @retval loss value
- */
- float getLoss() override;
-
- /**
- * @brief returns compilation state of a network
- * @retval initialized value
- */
- bool getCompiled() const override { return compiled; }
-
- /**
- * @brief returns initialization state of a network
- * @retval initialized value
- */
- bool getInitialized() const override { return initialized; }
-
- /**
- * @brief returns loadedFromConfig state of a network
- * @retval loadedFromConfig value
- */
- bool getLoadedFromConfig() const override { return loadedFromConfig; }
-
- /**
- * @brief Get Loss from the previous epoch of training data
- * @retval loss value
- */
- float getTrainingLoss() override { return training.loss; }
-
- /**
- * @brief Get Loss from the previous epoch of validation data
- * @retval loss value
- */
- float getValidationLoss() override { return validation.loss; }
-
- RunStats getTrainingStats() override { return training; }
-
- RunStats getValidStats() override { return validation; }
-
- RunStats getTestStats() override { return testing; }
-
- /**
- * @brief Get Learning rate
- * @retval Learning rate
- *
- * @todo update to return the last used learning rate
- */
- float getLearningRate() { return opt->getLearningRate(0); };
-
- /**
- * @brief Create and load the Network with ini configuration file.
- * @param[in] config config file path
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int loadFromConfig(const std::string &config) override;
-
- /**
- * @brief Compile the graph in the model
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int compile() override;
-
- /**
- * @brief set Property of Network
- * @param[in] values values of property
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- void setProperty(const std::vector<std::string> &values) override;
-
- /**
- * @brief Initialize Network. This should be called after set all
- * hyperparameters.
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int initialize() override;
-
- /**
- * @brief Allocate memory for the model. This should be called after
- * initialize.
- * @param[in] exec_mode allocate memory based on the given execution mode
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int allocate(ExecutionMode mode = ExecutionMode::TRAIN);
-
- /**
- * @brief Deallocate memory for the model.
- * @param[in] trainable Assign memory for inference or train mode
- * @retval #ML_ERROR_NONE Successful.
- * @note This does not free the model graph but only the weight tensors, and
- * input/output/gradient/derivative tensors if any.
- */
- int deallocate();
-
- /**
- * @brief Update graph to make batch normalization in-place
- * @note This assumes that the batch normalization implementation does
- * not need input/output of itself while backwarding. The reason is that the
- * batch normalization layer caches a processed form of its own input than the
- * input tensor itself.
- * @note This optimization might break the working when some other
- * implementation of batch normalization layer is used or delegated to some
- * other backend. Ensure to verify this optimization with other
- * implementations once added.
- */
- void inPlaceOptimization(const std::string &layer_type);
-
- /**
- * @brief Forward Propagation of the neural network
- */
- sharedConstTensors forwarding(bool training = true,
- std::function<bool(void *userdata)> stop_cb =
- [](void *user_data) { return false; },
- void *user_data = nullptr);
-
- /**
- * @brief Forward Propagation of the neural network
- * @param[in] input List of Input Tensors taken by the neural network
- * @param[in] label List of Label Tensors for the model
- * @retval List of Output Tensors
- */
- sharedConstTensors forwarding(sharedConstTensors input,
- sharedConstTensors label = {},
- bool training = true);
-
- /**
- * @brief Backward Propagation of the neural network
- * @param[in] iteration Iteration Number for the optimizer
- */
- void backwarding(int iteration,
- std::function<bool(void *userdata)> stop_cb =
- [](void *user_data) { return false; },
- void *user_data = nullptr);
-
- /**
- * @copydoc Model::save(const std::string &file_path, ml::train::ModelFormat
- * format);
- */
- void save(const std::string &file_path,
- ml::train::ModelFormat format =
- ml::train::ModelFormat::MODEL_FORMAT_BIN) override;
-
- /**
- * @copydoc Model::load(const std::string &file_path, ml::train::ModelFormat
- * format);
- */
- void load(const std::string &file_path,
- ml::train::ModelFormat format =
- ml::train::ModelFormat::MODEL_FORMAT_BIN) override;
-
- /**
- * @brief get Epochs
- * @retval epochs
- */
- unsigned int getEpochs() {
- return std::get<props::Epochs>(model_flex_props);
- };
-
- /**
- * @brief get current epoch_idx
- * @retval current epoch_idx
- */
- unsigned int getCurrentEpoch() override;
-
- /**
- * @brief Copy Neural Network
- * @param[in] from NeuralNetwork Object to copy
- * @retval NeuralNewtork Object copyed
- * @todo Need to implement the copy of graph core
- */
- NeuralNetwork ©(NeuralNetwork &from);
-
- /**
- * @brief Copy Neural Network Configuration
- * @param[in] from NeuralNetwork Object to copy
- * @retval NeuralNewtork Object copyed
- * @note This does not copy the context of neural network model. It only
- * copies the configuration of the network model. Therefore, it needs the
- * compile and initialization to run the model. Also if you need the
- * initialized the weight, load call is required.
- */
- NeuralNetwork ©Configuration(NeuralNetwork &from);
-
- /**
- * @brief Run NeuralNetwork train
- * @param[in] values hyper parameters
- * @param[in] stop_cb callback function to decide stop training or not
- * ~~~~~
- * @a stop_user_data user_data to be used in stop_cb
- * @a bool true if stop the training
- * ~~~~~
- * @param[in] epoch_complete_cb Called the end of an epoch.
- * @a epoch_user_data user_data to be used in epoch_complete_cb
- * ~~~~~
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int train(const std::vector<std::string> &values = {},
- std::function<bool(void *)> stop_cb =
- [](void *stop_user_data) { return false; },
- void *stop_user_data = nullptr,
- std::function<void(void *)> epoch_complete_cb =
- [](void *epoch_user_data) { return false; },
- void *epoch_user_data = nullptr) override;
-
- /**
- * @brief Run NeuralNetwork inference
- * @param[in] X input tensor
- * @param[in] free_mem true to free memory. used only in training mode.
- * @retval shared_ptr<const Tensor>
- */
- sharedConstTensors inference(sharedConstTensors X, bool free_mem = false);
-
- /**
- * @brief Run NeuralNetwork inference
- * @param[in] X input tensor
- * @param[in] label label tensor
- * @param[in] free_mem true to free memory. used only in training mode.
- * @retval shared_ptr<const Tensor>
- */
- sharedConstTensors inference(sharedConstTensors X, sharedConstTensors label,
- bool free_mem = false);
-
- /**
- * @brief Run the inference of the model
- * @param[in] batch batch size of current input
- * @param[in] input inputs as a list of each input data
- * @param[in] label labels as a list of each label data
- * @retval list of output as float *
- * @note The output memory must not be freed by the caller
- */
- std::vector<float *> inference(unsigned int batch,
- const std::vector<float *> &input,
- const std::vector<float *> &label) override;
-
- /**
- * @brief Run NeuralNetwork train with callback function by user
- * @param[in] dt datatype (mode) where it should be
- * @param[in] dataset set the dataset
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int setDataset(const DatasetModeType &dt,
- std::shared_ptr<ml::train::Dataset> dataset) override;
-
- /**
- * @copydoc void forEachLayer(std::function<void(Layer &,
- * nntrainer::RunLayerContext &), void *user_data> fn);
- *
- */
- void forEachLayer(
- std::function<void(ml::train::Layer & /**< layer */,
- RunLayerContext & /**< rc */, void *user_data)>
- fn,
- void *user_data = nullptr) override;
-
- /**
- * @brief Run NeuralNetwork train with callback function by user
- * @param[in] dt datatype (mode) where it should be
- * @param[in] databuffer set the databuffer
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int setDataBuffer(const DatasetModeType &dt,
- std::shared_ptr<DataBuffer> data_buffer);
-
- /**
- * @brief add layer into neural network model
- * @param[in] layer layer to add
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int addLayer(std::shared_ptr<ml::train::Layer> layer) override {
- return addLayer(std::static_pointer_cast<LayerNode>(layer));
- }
-
- /**
- * @brief add layer into neural network model
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int addLayer(NodeType layer);
-
- /**
- * @brief set optimizer for the neural network model
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int setOptimizer(std::shared_ptr<ml::train::Optimizer> optimizer) override;
-
- /**
- * @brief get layer by name from neural network model
- * @param[in] name name of the layer to get
- * @param[out] layer shared_ptr to hold the layer to get
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int getLayer(const char *name,
- std::shared_ptr<ml::train::Layer> *layer) override;
-
- /**
- * @brief this function helps exporting the layer in a predefined format,
- * while workarounding issue caused by templated function type eraser
- *
- * @param exporter exporter that conatins exporting logic
- * @param method enum value to identify how it should be exported to
- */
- void exportTo(Exporter &exporter,
- const ml::train::ExportMethods &method) const;
-
- /**
- * @brief get input dimension of neural network
- * @retval std::vector<TensorDim> input dimension
- */
- std::vector<TensorDim> getInputDimension() override {
- if (!compiled) {
- throw std::logic_error("model should be compiled before get dimension");
- }
- return model_graph.getInputDimension();
- }
-
- /**
- * @brief get output dimension of neural network
- * @retval std::vector<TensorDim> output dimension
- */
- std::vector<TensorDim> getOutputDimension() override {
- if (!compiled) {
- throw std::logic_error("model should be compiled before get dimension");
- }
- return model_graph.getOutputDimension();
- }
-
- /**
- * @brief get FlatGraph of current graph
- * @note flat graph contains pointer to the actual nodes, which is not deeply
- * copied.
- * @retval flatGraph of the current graph
- * @note these layers will be in sorted order if the model is compiled,
- * otherwise the order is the order of addition of layers in the model.
- */
- FlatGraphType getFlatGraph() { return model_graph.getLayerNodes(); }
-
- /**
- * @brief get if the model is empty
- * @param[out] true if empty, else false
- */
- bool empty() const { return model_graph.empty(); }
-
- /**
- * @brief get the number of nodes in the model
- * @param[out] number of nodes
- */
- size_t size() const { return model_graph.size(); }
-
- /**
- * @brief get network graph
- * @retval NetowrkGraphType
- */
- NetworkGraphType getNetworkGraph() { return model_graph; }
-
- /**
- * @brief get current graph from the model
- * @note graph contains pointer to the actual nodes, which is not deeply
- * copied.
- * @retval current graph
- */
- GraphType getUnsortedLayers(const std::string &input_layer = "",
- const std::string &output_layer = "");
-
- /**
- * @brief Summarize the model
- * @param out std::ostream to get the model summary
- * @param verbosity verbosity of the summary
- */
- virtual void summarize(std::ostream &out,
- ml_train_summary_type_e verbosity) override {
- printPreset(out, (unsigned int)verbosity);
- }
-
- /**
- * @brief Print Option when printing model info. The function delegates to the
- * `print`
- * @param out std::ostream to print
- * @param preset preset from `ml_train_summary_type_e`
- */
- virtual void printPreset(std::ostream &out, unsigned int preset);
-
- /**
- * @brief Enable dynamic fine-tuning optimization
- * @param threshold Comparison limit to decide if weight updated or not
- * @param mode dynamic fine-tuning optimization mode. Supported modes are
- * "max" and "norm" for now
- */
- void enableDynamicTraining(
- float threshold, std::string op = DynamicTrainingOptimization::dft_opt_norm,
- std::string mode = DynamicTrainingOptimization::dft_opt_mode_derivative) {
- dynamic_training_opt.setThreshold(threshold);
- dynamic_training_opt.setOp(op);
- dynamic_training_opt.setMode(mode);
- dynamic_training_opt.enable();
- }
-
- /**
- * @brief Disable dynamic fine-tuning optimization
- */
- void disableDynamicFineTuning() { dynamic_training_opt.disable(); }
-
- /**
- * @copydoc void ml::train::Model::addWithReferenceLayers(
- * const std::vector<std::shared_ptr<Layer>> &reference,
- * const std::string &scope, const std::vector<std::string> &input_layers,
- * const std::vector<std::string> &start_layers,
- * const std::vector<std::string> &end_layers, ReferenceLayersType type,
- * const std::vector<std::string> &type_properties = {})
- *
- */
- void addWithReferenceLayers(
- const std::vector<std::shared_ptr<ml::train::Layer>> &reference,
- const std::string &scope, const std::vector<std::string> &input_layers,
- const std::vector<std::string> &start_layers,
- const std::vector<std::string> &end_layers,
- ml::train::ReferenceLayersType type,
- const std::vector<std::string> &type_properties = {}) override;
-
- /**
- * @copydoc void ml::train::Model::addWithReferenceLayers(
- * const std::vector<std::shared_ptr<Layer>> &reference,
- * const std::string &scope, const std::vector<std::string> &input_layers,
- * const std::vector<std::string> &start_layers,
- * const std::vector<std::string> &end_layers, ReferenceLayersType type,
- * const std::vector<std::string> &type_properties = {})
- */
- void addWithReferenceLayers(
- const std::vector<std::shared_ptr<LayerNode>> &reference,
- const std::string &scope, const std::vector<std::string> &input_layers,
- const std::vector<std::string> &start_layers,
- const std::vector<std::string> &end_layers,
- ml::train::ReferenceLayersType type,
- const std::vector<std::string> &type_properties = {});
-
- /**
- * @brief export the model according to given export method
- * @param method export method
- * @param file_path path to be serialized
- */
- void exports(const ml::train::ExportMethods &method,
- const std::string file_path) override;
-
-private:
- using FlexiblePropTypes =
- std::tuple<props::Epochs, props::TrainingBatchSize, props::SavePath,
- props::ContinueTrain, props::SaveBestPath,
- props::MemoryOptimization, props::MemorySwap,
- props::MemorySwapPath, props::MemorySwapLookahead>;
- using RigidPropTypes =
- std::tuple<props::LossType, std::vector<props::InputConnection>,
- std::vector<props::LabelLayer>, props::ClipGradByGlobalNorm>;
-
- RigidPropTypes model_props; /**< model props */
- FlexiblePropTypes model_flex_props; /**< model train props */
- std::string load_path; /**< path to load weights when initialize */
-
- /**
- * @brief Print Options when printing layer info
- */
- typedef enum {
- // clang-format off
- PRINT_INST_INFO = (1 << 0), /**< Option to print type & instance address info */
- PRINT_GRAPH_INFO = (1 << 1), /**< Option to print graph topology info */
- PRINT_PROP = (1 << 2), /**< Option to print properties */
- PRINT_OPTIMIZER = (1 << 3), /**< Option to print optimizer */
- PRINT_METRIC = (1 << 4), /**< Option to print if current network is set to training */
- // clang-format on
- } PrintOption;
-
- unsigned int epoch_idx; /**< Number of epoch_idx */
-
- unsigned int iter; /**< iterations trained */
-
- float loss; /**< loss */
-
- std::shared_ptr<OptimizerWrapped> opt; /**< Optimizer; this gets copied into
- each layer, do not use this directly */
-
- std::array<std::shared_ptr<DataBuffer>, 3>
- data_buffers; /**< Data Buffers to get Input */
-
- bool initialized; /**< Network is initialized */
-
- bool compiled; /**< Network is compiled */
-
- bool loadedFromConfig; /**< Check if config is loaded to prevent load twice */
-
- RunStats validation; /** validation statistics of the model */
- RunStats training; /** training statistics of the model */
- RunStats testing; /** testing statistics of the model */
-
- AppContext app_context; /** Configurations bound to current app */
-
- NetworkGraph model_graph; /** Network Model Graph */
- GraphRepresentation graph_representation; /** Unsorted graph representation */
-
- DynamicTrainingOptimization dynamic_training_opt; /**< Dynamic fine-tuning
- optimization mode. supported modes are "max" and "norm" */
-
- /**
- * @brief save model in ini
- *
- * @param file_path file path
- */
- void saveModelIni(const std::string &file_path);
-
- /**
- * @brief print function for neuralnet
- * @param[in] out outstream
- * @param[in] flags bit combination of Neuralnet::PrintOption
- * @param[in] Layer::PrintPreset print preset when to print layer properties
- */
- void print(std::ostream &out, unsigned int flags = 0,
- LayerNode::PrintPreset layerPrintPreset =
- LayerNode::PrintPreset::PRINT_SUMMARY);
-
- /**
- * @brief Set Loss
- * @param[in] l loss value
- */
- void setLoss(float l);
-
- /**
- * @brief Run NeuralNetwork train
- * @param[in] stop_cb callback function to decide stop training or not
- * @param[in] epoch_complete_cb Called the end of an epoch.
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- int train_run(std::function<bool(void *)> stop_cb =
- [](void *) { return false; },
- void *user_data = nullptr,
- std::function<void(void *)> epoch_complete_cb =
- [](void *) { return false; },
- void *data = nullptr);
-
- /**
- * @brief Swap function for the class
- */
- friend void swap(NeuralNetwork &lhs, NeuralNetwork &rhs);
-
- /**
- * @brief set Property/Configuration of Network for training after the
- * network has been initialized
- * @param[in] values values of property
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
- */
- void setTrainConfig(const std::vector<std::string> &values);
-
- /**
- * @brief print metrics function for neuralnet
- * @param[in] out outstream
- * @param[in] flags verbosity from ml_train_summary_type_e
- */
- void printMetrics(std::ostream &out, unsigned int flags = 0);
-
- /**
- * @brief Match the given tensor shape with input shape of the model
- * @param[in] X input tensor
- * @retval true if matches, false is error
- */
- bool validateInput(sharedConstTensors X);
-};
-
-} /* namespace nntrainer */
-
-#endif /* __cplusplus */
-#endif /* __NEURALNET_H__ */
void sscal(const unsigned int N, const float alpha, void *X, const int incX,
ml::train::TensorDim::DataType d_type) {
+
+ if (d_type == ml::train::TensorDim::DataType::FP32) {
+
#ifdef USE_BLAS
#ifdef BLAS_NUM_THREADS
- openblas_set_num_threads(BLAS_NUM_THREADS);
-#endif
- if (d_type == ml::train::TensorDim::DataType::FP32)
+ openblas_set_num_threads(BLAS_NUM_THREADS);
+#endif // BLAS_NUM_THREADS
cblas_sscal(N, alpha, (float *)X, incX);
-#else
- if (d_type == ml::train::TensorDim::DataType::FP32) {
+#else // USE_BLAS else
sscal_raw(N, alpha, (float *)X, incX);
+#endif // USE_BLAS
} else if (d_type == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
sscal(N, alpha, (_FP16 *)X, incX);
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
}
-#endif
}
void sscal(const unsigned int N, const float alpha, float *X, const int incX) {
void saxpy(const unsigned int N, const float alpha, const void *X,
const int incX, void *Y, const int incY,
ml::train::TensorDim::DataType d_type) {
+ if (d_type == ml::train::TensorDim::DataType::FP32) {
#ifdef USE_BLAS
#ifdef BLAS_NUM_THREADS
- openblas_set_num_threads(BLAS_NUM_THREADS);
+ openblas_set_num_threads(BLAS_NUM_THREADS);
#endif
- cblas_saxpy(N, alpha, static_cast<const float *>(X), incX,
- static_cast<float *>(Y), incY);
+ cblas_saxpy(N, alpha, static_cast<const float *>(X), incX,
+ static_cast<float *>(Y), incY);
#else
- if (d_type == ml::train::TensorDim::DataType::FP32) {
saxpy_raw(N, alpha, static_cast<const float *>(X), incX,
static_cast<float *>(Y), incY);
+#endif
} else if (d_type == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
saxpy_FP16(N, alpha, static_cast<const _FP16 *>(X), incX,
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
}
-#endif
}
void saxpy(const unsigned int N, const float alpha, const float *X,
const float alpha, const void *A, const unsigned int lda,
const void *B, const unsigned int ldb, const float beta, void *C,
const unsigned int ldc, ml::train::TensorDim::DataType d_type) {
-#ifdef USE_CUBLAS
- int devID = 0;
- cudaDeviceProp deviceProp;
- cudaGetDeviceProperties(&deviceProp, devID);
- float *d_A, *d_B, *d_C;
-
- unsigned int size_A = M * K * sizeof(float);
- unsigned int size_B = K * N * sizeof(float);
- unsigned int size_C = M * N * sizeof(float);
- cudaMalloc((void **)&d_A, size_A);
- cudaMalloc((void **)&d_B, size_B);
- cudaMemcpy(d_A, A, size_A, cudaMemcpyHostToDevice);
- cudaMemcpy(d_B, B, size_B, cudaMemcpyHostToDevice);
- cudaMalloc((void **)&d_C, size_C);
-
- cublasHandle_t handle;
- cublasCreate(&handle);
-
- cublasOperation_t transA = (TransA == CblasTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
- cublasOperation_t transB = (TransB == CblasTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
- cublasSgemm(handle, transA, transB, N, M, K, &alpha, d_B, N, d_A, K, &beta,
- d_C, N);
+ if (d_type == ml::train::TensorDim::DataType::FP32) {
+#ifdef USE_CUBLAS
+ int devID = 0;
+ cudaDeviceProp deviceProp;
+ cudaGetDeviceProperties(&deviceProp, devID);
+ float *d_A, *d_B, *d_C;
+
+ unsigned int size_A = M * K * sizeof(float);
+ unsigned int size_B = K * N * sizeof(float);
+ unsigned int size_C = M * N * sizeof(float);
+
+ cudaMalloc((void **)&d_A, size_A);
+ cudaMalloc((void **)&d_B, size_B);
+ cudaMemcpy(d_A, A, size_A, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_B, B, size_B, cudaMemcpyHostToDevice);
+ cudaMalloc((void **)&d_C, size_C);
+
+ cublasHandle_t handle;
+ cublasCreate(&handle);
+
+ cublasOperation_t transA =
+ (TransA == CblasTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
+ cublasOperation_t transB =
+ (TransB == CblasTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
+ cublasSgemm(handle, transA, transB, N, M, K, &alpha, d_B, N, d_A, K, &beta,
+ d_C, N);
+
+ cudaMemcpy(C, d_C, size_C, cudaMemcpyDeviceToHost);
+ cublasDestroy(handle);
- cudaMemcpy(C, d_C, size_C, cudaMemcpyDeviceToHost);
- cublasDestroy(handle);
#elif defined USE_BLAS
+
#ifdef BLAS_NUM_THREADS
- openblas_set_num_threads(BLAS_NUM_THREADS);
+ openblas_set_num_threads(BLAS_NUM_THREADS);
#endif
- cblas_sgemm(order, TransA, TransB, M, N, K, alpha,
- static_cast<const float *>(A), lda, static_cast<const float *>(B),
- ldb, beta, static_cast<float *>(C), ldc);
+
+ cblas_sgemm(
+ order, TransA, TransB, M, N, K, alpha, static_cast<const float *>(A), lda,
+ static_cast<const float *>(B), ldb, beta, static_cast<float *>(C), ldc);
#else
- if (d_type == ml::train::TensorDim::DataType::FP32) {
sgemm_raw(order, TransA, TransB, M, N, K, alpha,
static_cast<const float *>(A), lda, static_cast<const float *>(B),
ldb, beta, static_cast<float *>(C), ldc);
+#endif
+
} else if (d_type == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
sgemm_FP16(
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
}
-#endif
-}
+} // namespace nntrainer
void sgemm(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
const unsigned int M, const unsigned int N, const unsigned int K,
void scopy(const unsigned int N, const void *X, const int incX, void *Y,
const int incY, ml::train::TensorDim::DataType d_type) {
+
+ if (d_type == ml::train::TensorDim::DataType::FP32) {
+
#ifdef USE_BLAS
#ifdef BLAS_NUM_THREADS
- openblas_set_num_threads(BLAS_NUM_THREADS);
+ openblas_set_num_threads(BLAS_NUM_THREADS);
#endif
- if (d_type == ml::train::TensorDim::DataType::FP32) {
cblas_scopy(N, (float *)X, incX, (float *)Y, incY);
- }
#else
- if (d_type == ml::train::TensorDim::DataType::FP32) {
scopy_raw(N, (float *)X, incX, (float *)Y, incY);
+#endif
+
} else if (d_type == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
scopy_FP16(N, (_FP16 *)X, incX, (_FP16 *)Y, incY);
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
}
-#endif
+
} // namespace nntrainer
void scopy(const unsigned int N, const float *X, const int incX, float *Y,
const unsigned int lda, const void *X, const int incX,
const float beta, void *Y, const int incY,
ml::train::TensorDim::DataType d_type) {
+ if (d_type == ml::train::TensorDim::DataType::FP32) {
#ifdef USE_BLAS
#ifdef BLAS_NUM_THREADS
- openblas_set_num_threads(BLAS_NUM_THREADS);
+ openblas_set_num_threads(BLAS_NUM_THREADS);
#endif
- return cblas_sgemv(order, TransA, M, N, alpha, static_cast<const float *>(A),
- lda, static_cast<const float *>(X), incX, beta,
- static_cast<float *>(Y), incY);
+ return cblas_sgemv(
+ order, TransA, M, N, alpha, static_cast<const float *>(A), lda,
+ static_cast<const float *>(X), incX, beta, static_cast<float *>(Y), incY);
#else
- if (d_type == ml::train::TensorDim::DataType::FP32) {
+
return sgemv_raw(order, TransA, M, N, alpha, static_cast<const float *>(A),
lda, static_cast<const float *>(X), incX, beta,
static_cast<float *>(Y), incY);
+#endif
} else if (d_type == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
return sgemv_FP16(order, TransA, M, N, alpha, static_cast<const _FP16 *>(A),
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
}
-#endif
}
void sgemv(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA, const unsigned int M,
* @brief This is Source for blas neon implementation
*
*/
+
#include <blas_neon.h>
#include <nntrainer_error.h>
const float32x4_t v_alpha = vdupq_n_f32(alpha);
if (cols % 16 == 0) {
- bool initialized[cols / 16];
+ unsigned int n = cols / 16;
+ bool *initialized = (bool *)malloc(sizeof(bool) * n);
unsigned int step;
for (unsigned int i = 0; i < cols / 16; ++i) {
initialized[i] = false;
vst1q_f32(&y[12], y12_15);
}
}
+ free(initialized);
return;
} else if (cols % 8 == 0) {
- bool initialized[cols / 8];
+ unsigned int n = cols / 8;
+ bool *initialized = (bool *)malloc(sizeof(bool) * n);
unsigned int step;
for (unsigned int i = 0; i < cols / 8; ++i) {
initialized[i] = false;
vst1q_f32(&y[4], y4_7);
}
}
+ free(initialized);
return;
} else if (cols % 4 == 0) {
- bool initialized[cols / 4];
+ unsigned int n = cols / 4;
+ bool *initialized = (bool *)malloc(sizeof(bool) * n);
+
unsigned int step;
for (unsigned int i = 0; i < cols / 4; ++i) {
initialized[i] = false;
vst1q_f32(&y[0], y0_3);
}
}
+ free(initialized);
}
+
return;
}
+#ifdef ENABLE_FP16
void sgemv_neon_fp16(const __fp16 *A, const __fp16 *X, __fp16 *Y, uint32_t rows,
uint32_t cols, float alpha, float beta) {
const __fp16 *__restrict x;
const float16x8_t v_alpha = vmovq_n_f16(alpha);
if (cols % 32 == 0) {
- bool initialized[cols / 32];
+ unsigned int n = cols / 32;
+ bool *initialized = (bool *)malloc(sizeof(bool) * n);
+
unsigned int step;
for (unsigned int i = 0; i < cols / 32; ++i) {
initialized[i] = false;
vst1q_f16(&y[24], y24_31);
}
}
+ free(initialized);
return;
} else if (cols % 16 == 0) {
- bool initialized[cols / 16];
+ unsigned int n = cols / 16;
+ bool *initialized = (bool *)malloc(sizeof(bool) * n);
+
unsigned int step;
for (unsigned int i = 0; i < cols / 16; ++i) {
initialized[i] = false;
vst1q_f16(&y[8], y8_15);
}
}
+ free(initialized);
return;
} else if (cols % 8 == 0) {
- bool initialized[cols / 8];
+ unsigned int n = cols / 8;
+ bool *initialized = (bool *)malloc(sizeof(bool) * n);
unsigned int step;
for (unsigned int i = 0; i < cols / 8; ++i) {
vst1q_f16(&y[0], y0_7);
}
}
+ free(initialized);
return;
}
}
return ret;
}
+#endif
} // namespace nntrainer::neon
uint32_t rows, uint32_t cols, float alpha,
float beta);
+#ifdef ENABLE_FP16
/**
* @brief sgemv computation with neon : Y = alpha*A*X + beta*Y
* @param[in] A __fp16 * for Matrix A
* @param[in] Y __fp16 * for Vector Y
*/
__fp16 sdot_neon_fp16(const unsigned int N, const __fp16 *X, const __fp16 *Y);
+#endif
} // namespace nntrainer::neon
setDist<float, std::normal_distribution<float>>(
std::normal_distribution<float>(mean, std));
} else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
setDist<_FP16, std::normal_distribution<float>>(
std::normal_distribution<float>(mean, std));
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
setDist<float, std::uniform_real_distribution<float>>(
std::uniform_real_distribution<float>(min, max));
} else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
setDist<_FP16, std::uniform_real_distribution<float>>(
std::uniform_real_distribution<float>(min, max));
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
setDist<float, std::bernoulli_distribution>(
std::bernoulli_distribution(probability));
} else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
setDist<_FP16, std::bernoulli_distribution>(
std::bernoulli_distribution(probability));
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
}
/// @todo add unittest
if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
auto f = std::bind(std::multiplies<float>(), std::placeholders::_1, value);
- return apply<float>(f, out);
+ apply<float>(f, out);
+ return out;
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
auto f = std::bind(std::multiplies<_FP16>(), std::placeholders::_1,
static_cast<_FP16>(value));
- return apply<_FP16>(f, out);
+ apply<_FP16>(f, out);
+ return out;
#else
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
- auto f = [&](const BroadcastInfo &e, const _FP16 *buf,
- const _FP16 *m_buf, _FP16 *out_buf) {
+ auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
+ _FP16 *out_buf) {
if (e.strides[3] == 1 && output.strides[3] == 1 && strides[3] == 1 &&
beta == 0.0) {
std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
auto f = std::bind(std::divides<float>(), std::placeholders::_1, value);
- return apply<float>(f, out);
+ apply<float>(f, out);
+ return out;
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
- auto f = std::bind(std::divides<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value));
- return apply<_FP16>(f, out);
+ auto f = std::bind(std::divides<_FP16>(), std::placeholders::_1,
+ static_cast<_FP16>(value));
+ apply<_FP16>(f, out);
+ return out;
#else
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
apply_broadcast(m, f, output);
} else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
- auto f = [&](const BroadcastInfo &e, const _FP16 *buf,
- const _FP16 *m_buf, _FP16 *out_buf) {
+ auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
+ _FP16 *out_buf) {
if (e.strides[3] == 1 && output.strides[3] == 1 && strides[3] == 1) {
std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
std::divides<_FP16>());
/// @todo add unittest
if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
auto f = std::bind(std::plus<float>(), std::placeholders::_1, value);
- return apply<float>(f, out);
+ apply<float>(f, out);
+ return out;
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1,
static_cast<_FP16>(value));
- return apply<_FP16>(f, out);
+ apply<_FP16>(f, out);
+ return out;
#else
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
- auto f = [&](const BroadcastInfo &e, const _FP16 *buf,
- const _FP16 *m_buf, _FP16 *out_buf) {
+ auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
+ _FP16 *out_buf) {
saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]);
/// @todo: saxpy is not valid for _FP16
};
apply_broadcast(m, f, output);
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
- auto f = [&](const BroadcastInfo &e, const _FP16 *buf,
- const _FP16 *m_buf, _FP16 *out_buf) {
+ auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
+ _FP16 *out_buf) {
if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 &&
alpha == 0) {
std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
/// @todo add unittest
if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
auto f = std::bind(std::minus<float>(), std::placeholders::_1, value);
- return apply<float>(f, out);
+ apply<float>(f, out);
+ return out;
} else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
auto f = std::bind(std::minus<_FP16>(), std::placeholders::_1,
static_cast<_FP16>(value));
- return apply<_FP16>(f, out);
+ apply<_FP16>(f, out);
+ return out;
#else
ml_loge("%s", "Error: enable-fp16 is not enabled");
#endif
Tensor &Tensor::pow(float exponent, Tensor &out) const {
if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
auto f = [exponent](float in) { return powf(in, exponent); };
- return apply<float>(f, out);
+ apply<float>(f, out);
+ return out;
}
if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
auto f = [exponent](_FP16 in) {
return static_cast<_FP16>(powf(in, exponent));
};
- return apply<_FP16>(f, out);
+ apply<_FP16>(f, out);
+ return out;
#else
ml_loge("%s", "Error: enable-fp16 is not enabled");
#endif
ret_dims[i].width(), ret_dims[i].channel()};
}
- ret_t.apply_i<float>([&iter_value, &loc, &end_loc, &reset_dim_arr](float _) {
- return iter_value(loc, end_loc, reset_dim_arr);
- });
+ ret_t.apply_i<float>(
+ [&iter_value, &loc, &end_loc, &reset_dim_arr](float _) {
+ return iter_value(loc, end_loc, reset_dim_arr);
+ });
}
}
if (getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
- auto iter_value =
- [this, is_format_nchw](
- std::array<size_t, 4> &loc, const std::array<size_t, 4> &end_loc,
- const std::array<size_t, 4> &reset_dim_arr) -> _FP16 & {
+ auto iter_value = [this, is_format_nchw](
+ std::array<size_t, 4> &loc,
+ const std::array<size_t, 4> &end_loc,
+ const std::array<size_t, 4> &reset_dim_arr) -> _FP16 & {
auto &value = (is_format_nchw)
? getValue<_FP16>(loc[0], loc[1], loc[2], loc[3])
: getValue<_FP16>(loc[0], loc[3], loc[1], loc[2]);
ret_dims[i].width(), ret_dims[i].channel()};
}
- ret_t.apply_i<_FP16>([&iter_value, &loc, &end_loc, &reset_dim_arr](_FP16 _) {
- return iter_value(loc, end_loc, reset_dim_arr);
- });
+ ret_t.apply_i<_FP16>(
+ [&iter_value, &loc, &end_loc, &reset_dim_arr](_FP16 _) {
+ return iter_value(loc, end_loc, reset_dim_arr);
+ });
}
#else
} else if (ref_dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
auto iter_value =
- [is_format_nchw](
- std::array<unsigned, 4> &loc, const std::array<unsigned, 4> &start_loc,
- Tensor &t, const std::array<unsigned, 4> &ref_dim_arr) -> _FP16 & {
+ [is_format_nchw](std::array<unsigned, 4> &loc,
+ const std::array<unsigned, 4> &start_loc, Tensor &t,
+ const std::array<unsigned, 4> &ref_dim_arr) -> _FP16 & {
auto &value = is_format_nchw
? t.getValue<_FP16>(loc[0], loc[1], loc[2], loc[3])
: t.getValue<_FP16>(loc[0], loc[3], loc[1], loc[2]);
}
for (size_t i = 0u, sz = t.size(); i < sz; ++i) {
- iter_value(loc, start_loc, ret, tensor_dim_arr) =
- t.getValue<_FP16>(i);
+ iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue<_FP16>(i);
}
if (is_format_nchw) {
BroadcastInfo e;
e.buffer_size = size();
e.strides[3] = 1;
- v_func(e, getData<_FP16>(), m.getData<_FP16>(),
- output.getData<_FP16>());
+ v_func(e, getData<_FP16>(), m.getData<_FP16>(), output.getData<_FP16>());
return;
}
_FP16 *rdata = ret.getData<_FP16>();
for (unsigned int k = 0; k < dim[0]; ++k) {
sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1,
- &data[k * dim.getFeatureLen()], feat_len,
- ones.getData<_FP16>(), 1, beta, &rdata[k * feat_len], 1);
+ &data[k * dim.getFeatureLen()], feat_len, ones.getData<_FP16>(),
+ 1, beta, &rdata[k * feat_len], 1);
}
}
} break;
_FP16 *rdata = ret.getData<_FP16>();
for (unsigned int k = 0; k < dim[0]; ++k) {
sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1,
- &data[k * dim.getFeatureLen()], feat_len,
- ones.getData<_FP16>(), 1, beta, &rdata[k * feat_len], 1);
+ &data[k * dim.getFeatureLen()], feat_len, ones.getData<_FP16>(),
+ 1, beta, &rdata[k * feat_len], 1);
}
} else {
unsigned int t_3 = dim[3];
/// (1 * K) X (1 * M) can be a case
/// case1: (1 * K) X (K * 1)
if (M == 1 && N == 1) {
- *rdata =
- sdot(K, data, 1, mdata, 1) + static_cast<_FP16>(beta) * (*rdata);
+ *rdata = sdot(K, data, 1, mdata, 1) + static_cast<_FP16>(beta) * (*rdata);
}
/// case2: (M * K) X (K * 1)
else if (N == 1) {
for (unsigned int l = 0; l < in.width(); ++l) {
output.setValue(i, j, k, l,
in.getValue<_FP16>(i, j, (in.height() - k - 1),
- (in.width() - l - 1)));
+ (in.width() - l - 1)));
}
}
}
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
#ifdef ENABLE_FP16
+ /**
+ * @brief Constructor of Tensor
+ * @note This constructor copies vector again. needs refactoring
+ * @param[in] d data for the Tensor with batch size one
+ */
Tensor(std::vector<std::vector<std::vector<std::vector<_FP16>>>> const &d,
ml::train::TensorDim::TensorType t_type) {
*/
Tensor &erf(Tensor &out) const;
+ /**
+ * @brief getter of size of data
+ * @retval size of data
+ */
unsigned int sizeofData() { return dim.getDataTypeSize(); }
/**
*/
void standardization_i();
+ /**
+ * @brief i data index
+ * @retval address of ith data
+ */
template <typename T = float> T *getAddress(unsigned int i) {
size_t index = getIndex(batch(), channel(), height(), width());
if (i > index) {
/**
* @brief Apply function element by element
* @param[in] *function function pointer applied
- * @retval Tensor
- */
- template <typename T = float> Tensor apply(std::function<T(T)> f) const {
- Tensor result;
- return apply<T>(f, result);
- };
-
- /**
- * @brief Apply function element by element
- * @param[in] *function function pointer applied
* @param[out] output output tensor
* @retval Tensor
*/
return output;
};
+ /**
+ * @brief Apply function element by element
+ * @param[in] *function function pointer applied
+ * @retval Tensor
+ */
+ template <typename T = float> Tensor apply(std::function<T(T)> f) const {
+ Tensor result;
+ apply<T>(f, result);
+
+ return result;
+ };
+
// /**
// * @brief Apply instantly to the element
// *
// /**
// * @brief Apply function element by element
// * @param[in] *function function pointer applied
+ // * @retval Tensor
+ // */
+ // Tensor apply(std::function<float(float)> f) const {
+ // Tensor result;
+ // return apply(f, result);
+ // };
+
+ // /**
+ // * @brief Apply function element by element
+ // * @param[in] *function function pointer applied
// * @param[out] output output tensor
// * @retval Tensor
// */
return data->getAddr<T>() + offset + index;
}
+ /**
+ * @brief setter data type
+ * @param[in] Data Type
+ */
void setDataType(Tdatatype d_type) { dim.setDataType(d_type); }
+ /**
+ * @brief setter tensor type
+ * @param[in] tensor Type
+ */
void setTensorType(ml::train::TensorDim::TensorType t_type) {
dim.setTensorType(t_type);
}
v_func,
Tensor &output) const;
#ifdef ENABLE_FP16
+ /**
+ * @brief Applies the given operator to the tensor with the passed argument
+ * @param[in] m Tensor
+ * @param[in] v_func vectorized function to apply
+ * @param e broadcast info.
+ * @param cur_axis current axis. pass default when calling outside.
+ * @param offset offset for this. pass default when calling outside.
+ * @param m_offset offset for m. pass default when calling outside.
+ * @retval #ML_ERROR_NONE Successful
+ * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
+ */
void
apply_broadcast_util(Tensor const &m,
std::function<void(const BroadcastInfo &e, const _FP16 *,
Tensor &output, const BroadcastInfo &e,
int cur_axis = -1, size_t offset = 0,
size_t m_offset = 0) const;
-
+ /**
+ * @brief Applies the given operator to the tensor with the passed argument
+ *
+ * @param[in] m Tensor
+ * @param[in] v_func vectorized function to apply
+ * @retval #ML_ERROR_NONE Successful
+ * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
+ */
void apply_broadcast(Tensor const &m,
std::function<void(const BroadcastInfo &e, const _FP16 *,
const _FP16 *, _FP16 *)>
new_weights.push_back(filter);
auto &bias_weight = *old_weights[1];
- TensorDim bias_dim{ml::train::TensorDim::Format::NCHW,
- std::bitset<4>(0b0001)};
+ TensorDim bias_dim{bias_weight.getTensorType(), std::bitset<4>(0b0001)};
bias_dim.setTensorDim(
3 /** index **/,
bias_weight
// create "perm" tensor for Transpose operator
// @todo : This NCHW format setting is just temporal, it needs to be set by
// global configuration
- TensorDim perm_dim{ml::train::TensorDim::Format::NCHW,
- std::bitset<4>(0b0001)};
+ TensorDim perm_dim{inputs[0]->getTensorType(), std::bitset<4>(0b0001)};
perm_dim.setTensorDim(3 /** index **/,
4 /** value **/); // effective dimension = {4}
new_inputs.emplace_back(perm_dim);
float i = 0;
t = t.apply((std::function<float(float)>)[&](float in) { return i++; });
} else if (t_type.data_type == nntrainer::Tdatatype::FP16) {
+#ifdef ENABLE_FP16
_FP16 i = 0;
t = t.apply((std::function<_FP16(_FP16)>)[&](_FP16 in) { return i++; });
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
return t;
if (t.getDataType() == ml::train::TensorDim::DataType::FP32) {
nntrainer::checkedRead(file, (char *)&sz, sizeof(unsigned));
} else if (t.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
nntrainer::checkedRead(file, (char *)&sz, sizeof(_FP16));
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
}
NNTR_THROW_IF(t.getDim().getDataLen() != sz, std::invalid_argument)
ml::train::TensorDim::DataType::FP16 &&
t2.getDim().getDataType() ==
ml::train::TensorDim::DataType::FP16) {
-
+#ifdef ENABLE_FP16
for (unsigned int idx = 0; idx < total; idx++) {
auto d1 = t1.getValue<_FP16>(idx);
auto d2 = t2.getValue<_FP16>(idx);
EXPECT_IN_RANGE(mean_squared_error, 0, epsilon);
return (weak_match == total);
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
} else
return false;
};
attention_shared_kv_batched,
attention_batched));
+#ifdef ENABLE_FP16
auto attention_shared_kv_fp16fp16 = LayerGoldenTestParamType(
nntrainer::createLayer<nntrainer::AttentionLayer>, {}, "1:1:5:7,1:1:3:7",
"attention_shared_kv_fp16fp16.nnlayergolden",
GTEST_PARAMETER_TEST(Attention16, LayerGoldenTest,
::testing::Values(attention_shared_kv_fp16fp16));
+#endif
LayerGoldenTestParamOptions::SKIP_CALC_GRAD,
"nhwc", "fp32", "fp32");
+GTEST_PARAMETER_TEST(FullyConnected, LayerGoldenTest,
+ ::testing::Values(fc_basic_plain, fc_basic_single_batch,
+ fc_basic_no_decay, fc_basic_plain_nhwc,
+ fc_basic_single_batch_nhwc,
+ fc_basic_no_decay_nhwc));
+
+#ifdef ENABLE_FP16
auto fc_basic_plain_fp16fp16 = LayerGoldenTestParamType(
nntrainer::createLayer<nntrainer::FullyConnectedLayer>, {"unit=5"},
"3:1:1:10", "fc_plain_fp16fp16.nnlayergolden",
"fc_plain_fp16fp16.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
"nchw", "fp16", "fp16");
-GTEST_PARAMETER_TEST(FullyConnected, LayerGoldenTest,
- ::testing::Values(fc_basic_plain, fc_basic_single_batch,
- fc_basic_no_decay, fc_basic_plain_nhwc,
- fc_basic_single_batch_nhwc,
- fc_basic_no_decay_nhwc,
- fc_basic_plain_fp16fp16,
+GTEST_PARAMETER_TEST(FullyConnected16, LayerGoldenTest,
+ ::testing::Values(fc_basic_plain_fp16fp16,
fc_basic_single_batch_fp16fp16,
fc_basic_no_decay_fp16fp16));
+#endif
#include <layers_common_tests.h>
#include <nnstreamer_layer.h>
-auto semantic_nnstreamer = LayerSemanticsParamType(
- nntrainer::createLayer<nntrainer::NNStreamerLayer>,
- nntrainer::NNStreamerLayer::type,
- {"model_path=../test/test_models/models/add.tflite"},
- LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
+// auto semantic_nnstreamer = LayerSemanticsParamType(
+// nntrainer::createLayer<nntrainer::NNStreamerLayer>,
+// nntrainer::NNStreamerLayer::type,
+// {"model_path=../test/test_models/models/add.tflite"},
+// LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
-GTEST_PARAMETER_TEST(NNStreamer, LayerSemantics,
- ::testing::Values(semantic_nnstreamer));
+// GTEST_PARAMETER_TEST(NNStreamer, LayerSemantics,
+// ::testing::Values(semantic_nnstreamer));
if [ ! -d builddir ]; then
#default value of openblas num threads is 1 for android
#enable-tflite-interpreter=false is just temporally until ci system is stabel
- meson builddir -Dplatform=android -Dopenblas-num-threads=1 -Denable-tflite-interpreter=false -Denable-tflite-backbone=false
+ meson builddir -Dplatform=android -Dopenblas-num-threads=1 -Denable-tflite-interpreter=false -Denable-tflite-backbone=false -Denable-fp16=true
else
echo "warning: $TARGET/builddir has already been taken, this script tries to reconfigure and try building"
pushd builddir
#default value of openblas num threads is 1 for android
#enable-tflite-interpreter=false is just temporally until ci system is stabel
- meson configure -Dplatform=android -Dopenblas-num-threads=1 -Denable-tflite-interpreter=false -Denable-tflite-backbone=false
+ meson configure -Dplatform=android -Dopenblas-num-threads=1 -Denable-tflite-interpreter=false -Denable-tflite-backbone=false -Denable-fp16=true
meson --wipe
popd
fi