From 4deea8df7f2acb863706aa68383415e68683d466 Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Tue, 7 Sep 2021 18:51:14 +0900 Subject: [PATCH] [Props] Implement activation property This patch implements activation property by enum **Additional changes** - TimeDistribute property is now handled at finalize() **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- nntrainer/graph/network_graph.cpp | 11 ++- nntrainer/layers/acti_func.h | 13 +--- nntrainer/layers/activation_layer.cpp | 79 +++++--------------- nntrainer/layers/activation_layer.h | 70 +++-------------- nntrainer/layers/centroid_knn.h | 1 - nntrainer/layers/common_properties.h | 47 ++++++++++-- nntrainer/layers/layer_node.cpp | 136 ++++++++++++---------------------- nntrainer/layers/layer_node.h | 36 +++------ nntrainer/utils/base_properties.h | 6 +- 9 files changed, 139 insertions(+), 260 deletions(-) diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 842f8a8..fb33e75 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -196,7 +196,9 @@ int NetworkGraph::realizeActivationType( lnode->setProperty({"distribute=true"}); } - lnode->setProperty({"activation=" + ActivationTypeStr[(unsigned int)act]}); + props::Activation act_prop; + act_prop.set(act); + lnode->setProperty({"activation=" + to_string(act_prop)}); in_node->setProperty({"activation=none"}); lnode->setInputLayers({in_node->getName()}); @@ -297,7 +299,6 @@ int NetworkGraph::addLossLayer(const std::string &loss_type_) { } void NetworkGraph::setOutputLayers() { - for (auto iter_idx = cbegin(); iter_idx != cend(); iter_idx++) { auto &layer_idx = *iter_idx; for (auto iter_i = cbegin(); iter_i != cend(); iter_i++) { @@ -354,15 +355,14 @@ int NetworkGraph::checkCompiledGraph() { } int NetworkGraph::realizeGraph() { - int status = ML_ERROR_NONE; addDefaultInputLayers(); /** * invariant: the new realized nodes are added to the end, - * otherwise this iteration becomes invalid. So, every iteration must be fresh - * iterator as vector resize invalidates all the iterators. + * otherwise this iteration becomes invalid. So, every iteration must be + * fresh iterator as vector resize invalidates all the iterators. */ for (unsigned int i = 0; i < graph.size(); ++i) { auto const &lnode = LNODE(*(cbegin() + i)); @@ -511,7 +511,6 @@ std::vector> NetworkGraph::getLayerNodes() const { void NetworkGraph::extendGraph(std::vector> ex_graph, std::string &prefix) { - if (compiled) throw std::runtime_error("Cannot modify graph after compile"); diff --git a/nntrainer/layers/acti_func.h b/nntrainer/layers/acti_func.h index b7c4e81..8aaf529 100644 --- a/nntrainer/layers/acti_func.h +++ b/nntrainer/layers/acti_func.h @@ -16,23 +16,12 @@ #define __ACTI_FUNC_H__ #ifdef __cplusplus +#include #include namespace nntrainer { /** - * @brief Enumeration of activation function type - */ -enum class ActivationType { - ACT_TANH, /** tanh */ - ACT_SIGMOID, /** sigmoid */ - ACT_RELU, /** ReLU */ - ACT_SOFTMAX, /** softmax */ - ACT_NONE, /** no op */ - ACT_UNKNOWN /** unknown */ -}; - -/** * @brief Activation enum to string map */ const std::array ActivationTypeStr = { diff --git a/nntrainer/layers/activation_layer.cpp b/nntrainer/layers/activation_layer.cpp index e0bd297..5ac2e1c 100644 --- a/nntrainer/layers/activation_layer.cpp +++ b/nntrainer/layers/activation_layer.cpp @@ -20,18 +20,29 @@ #include #include +#include #include #include #include +#include #include #include #include namespace nntrainer { +ActivationLayer::ActivationLayer() : + Layer(), + activation_props(new PropTypes(props::Activation())) { + acti_func.setActiFunc(ActivationType::ACT_NONE); +} static constexpr size_t SINGLE_INOUT_IDX = 0; void ActivationLayer::finalize(InitLayerContext &context) { + auto &act = std::get(*activation_props); + NNTR_THROW_IF(act.empty(), std::invalid_argument) + << "activation has not been set!"; + acti_func.setActiFunc(act.get()); context.setOutputDimensions(context.getInputDimensions()); } @@ -49,69 +60,15 @@ void ActivationLayer::calcDerivative(RunLayerContext &context) { acti_func.run_prime_fn(out, ret, deriv); } -void ActivationLayer::setProperty(const std::vector &values) { - /// @todo: deprecate this in favor of loadProperties - for (unsigned int i = 0; i < values.size(); ++i) { - std::string key; - std::string value; - std::stringstream ss; - - if (getKeyValue(values[i], key, value) != ML_ERROR_NONE) { - throw std::invalid_argument("Error parsing the property: " + values[i]); - } - - if (value.empty()) { - ss << "value is empty: key: " << key << ", value: " << value; - throw std::invalid_argument(ss.str()); - } - - /// @note this calls derived setProperty if available - setProperty(key, value); - } -} - -void ActivationLayer::setProperty(const std::string &type_str, - const std::string &value) { - using PropertyType = nntrainer::Layer::PropertyType; - nntrainer::Layer::PropertyType type = - static_cast(parseLayerProperty(type_str)); - - switch (type) { - case PropertyType::activation: { - acti_func.setActiFunc((ActivationType)parseType(value, TOKEN_ACTI)); - } break; - default: - std::string msg = - "[Layer] Unknown Layer Property Key for value " + std::string(value); - throw exception::not_supported(msg); - } -} - -int ActivationLayer::setActivation( - std::function const &activation_fn, - std::function const - &activation_prime_fn) { - acti_func.setActivation(activation_fn, activation_prime_fn); - - return ML_ERROR_NONE; +void ActivationLayer::exportTo(Exporter &exporter, + const ExportMethods &method) const { + exporter.saveResult(*activation_props, method, this); } -int ActivationLayer::setActivation( - std::function const &activation_fn, - std::function const &activation_prime_fn) { - - acti_func.setActivation(activation_fn, activation_prime_fn); - - return ML_ERROR_NONE; -} - -int ActivationLayer::setActivation( - std::function const &activation_fn, - std::function const &activation_prime_fn) { - - acti_func.setActivation(activation_fn, activation_prime_fn); - - return ML_ERROR_NONE; +void ActivationLayer::setProperty(const std::vector &values) { + auto left = loadProperties(values, *activation_props); + NNTR_THROW_IF(!left.empty(), std::invalid_argument) + << "Failed to set property"; } }; // namespace nntrainer diff --git a/nntrainer/layers/activation_layer.h b/nntrainer/layers/activation_layer.h index 637a8dc..52e1e2d 100644 --- a/nntrainer/layers/activation_layer.h +++ b/nntrainer/layers/activation_layer.h @@ -15,11 +15,18 @@ #define __ACTIVATION_LAYER_H__ #ifdef __cplusplus +#include + #include #include namespace nntrainer { +namespace props { +class ActivationType; + +} // namespace props + /** * @class Activation Layer * @brief Activation Layer @@ -30,9 +37,7 @@ public: /** * @brief Constructor of Activation Layer */ - ActivationLayer() : Layer() { - acti_func.setActiFunc(ActivationType::ACT_NONE); - } + ActivationLayer(); /** * @brief Destructor of Activation Layer @@ -62,8 +67,7 @@ public: /** * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method) */ - void exportTo(Exporter &exporter, - const ExportMethods &method) const override {} + void exportTo(Exporter &exporter, const ExportMethods &method) const override; /** * @copydoc Layer::getType() @@ -83,61 +87,11 @@ public: inline static const std::string type = "activation"; private: - ActiFunc - acti_func; /**< activation function designating the activation operation */ + using PropTypes = std::tuple; - /** - * @brief setActivation by custom activation function - * @note apply derivative as this activation_prime_fn does not utilize - * derivative - * @param[in] std::function activation_fn - * activation function to be used - * @param[in] std::function - * activation_prime_fn activation_prime_function to be used - * @retval #ML_ERROR_NONE when successful - */ - int setActivation( - std::function const &activation_fn, - std::function const &activation_prime_fn); + std::unique_ptr activation_props; /**< activation props */ - /** - * @brief setActivation by custom activation function - * @note derivative not applied here as this activation_prime_fn applies - * derivative itself - * @param[in] std::function activation_fn - * activation function to be used - * @param[in] std::function - * activation_prime_fn activation_prime_function to be used - * @retval #ML_ERROR_NONE when successful - */ - int setActivation( - std::function const &activation_fn, - std::function const - &activation_prime_fn); - - /** - * @brief setActivation by custom activation function - * @note apply derivative as this activation_prime_fn does not utilize - * derivative - * @param[in] std::function activation_fn activation - * function to be used - * @param[in] std::function activation_prime_fn - * activation_prime_function to be used - * @retval #ML_ERROR_NONE when successful - */ - int setActivation( - std::function const &activation_fn, - std::function const &activation_prime_fn); - - /** - * @brief setProperty by type and value separated - * @param[in] type property type to be passed - * @param[in] value value to be passed - * @exception exception::not_supported when property type is not valid for - * the particular layer - * @exception std::invalid_argument invalid argument - */ - void setProperty(const std::string &type, const std::string &value); + ActiFunc acti_func; /**< activation function from activation type */ }; } // namespace nntrainer diff --git a/nntrainer/layers/centroid_knn.h b/nntrainer/layers/centroid_knn.h index 83208d6..200493b 100644 --- a/nntrainer/layers/centroid_knn.h +++ b/nntrainer/layers/centroid_knn.h @@ -11,7 +11,6 @@ * @bug No known bugs except for NYI items * */ - #ifndef __CENTROID_KNN_H__ #define __CENTROID_KNN_H__ #include diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index afeb3d4..02d6783 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -10,18 +10,31 @@ * @author Jihoon Lee * @bug No known bugs except for NYI items */ - -#include +#ifndef __COMMON_PROPERTIES_H__ +#define __COMMON_PROPERTIES_H__ #include -#include #include +#include -#ifndef __COMMON_PROPERTIES_H__ -#define __COMMON_PROPERTIES_H__ +#include namespace nntrainer { +/** + * @brief Enumeration of activation function type + * @note Upon changing this enum, ActivationTypeInfo must be changed + * accordingly + */ +enum class ActivationType { + ACT_TANH, /** tanh */ + ACT_SIGMOID, /** sigmoid */ + ACT_RELU, /** ReLU */ + ACT_SOFTMAX, /** softmax */ + ACT_NONE, /** no op */ + ACT_UNKNOWN /** unknown */ +}; + namespace props { /** @@ -316,6 +329,30 @@ public: */ bool isValid(const unsigned int &v) const override; }; + +/******** below section is for enumerations ***************/ +/** + * @brief Enumeration of activation function type + */ +struct ActivationTypeInfo { + using Enum = nntrainer::ActivationType; + static constexpr std::initializer_list EnumList = { + Enum::ACT_TANH, Enum::ACT_SIGMOID, Enum::ACT_RELU, + Enum::ACT_SOFTMAX, Enum::ACT_NONE, Enum::ACT_UNKNOWN}; + + static constexpr const char *EnumStr[] = {"tanh", "sigmoid", "relu", + "softmax", "none", "unknown"}; +}; + +/** + * @brief Activation Enumeration Information + * + */ +class Activation final : public EnumProperty { +public: + using prop_tag = enum_class_prop_tag; + static constexpr const char *key = "activation"; +}; } // namespace props } // namespace nntrainer diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index 25794cb..72cee01 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -28,7 +28,6 @@ namespace nntrainer { namespace props { -class ActivationType; /** * @brief Flatten property, true if needs flatten layer afterwards @@ -49,9 +48,6 @@ public: Distribute() : Property() {} static constexpr const char *key = "distribute"; using prop_tag = bool_prop_tag; - bool isValid(const bool &v) const { - return empty() || !get(); - } /**< distribute=true can be set strictly one time */ }; /** @@ -170,11 +166,11 @@ createLayerNode(std::unique_ptr &&layer, LayerNode::LayerNode(std::unique_ptr &&l) : layer(std::move(l)), - activation_type(ActivationType::ACT_NONE), run_context(nullptr), - layer_node_props(new PropsType(props::Name(), props::Flatten(), - props::Distribute(), props::Trainable(), {}, - {})), + layer_node_props(new PropsType(props::Name(), props::Distribute(), + props::Trainable(), {}, {})), + layer_node_props_realization( + new RealizationPropsType(props::Flatten(), props::Activation())), loss(new props::Loss()), regularization_loss(0.0f), exec_order({0, 0, 0}) { @@ -184,75 +180,10 @@ LayerNode::LayerNode(std::unique_ptr &&l) : } void LayerNode::setProperty(const std::vector &properties) { - bool already_distributed = - !std::get(*layer_node_props).empty() && - std::get(*layer_node_props).get(); auto left_properties = loadProperties(properties, *layer_node_props); - - /// note that setting distribute is only allowed for one time. - /// until we have layerNode::finalize and must not except timedist layer - if (getDistribute() && !already_distributed) { - auto &ac = nntrainer::AppContext::Global(); - std::unique_ptr dlayer = - ac.createObject(TimeDistLayer::type); - if (dlayer.get() == nullptr) - throw std::invalid_argument("Error creating time distribution layer"); - auto *time_dist_layer = dynamic_cast(dlayer.get()); - if (time_dist_layer == nullptr) - throw std::invalid_argument("Error casting to time distribution layer"); - time_dist_layer->setDistLayer(std::move(layer)); - layer = std::move(dlayer); - } - - std::vector remainder; - /// @todo: deprecate this in favor of loadProperties - for (unsigned int i = 0; i < left_properties.size(); ++i) { - - std::string key; - std::string value; - std::stringstream ss; - - if (getKeyValue(left_properties[i], key, value) != ML_ERROR_NONE) { - throw std::invalid_argument("Error parsing the property: " + - left_properties[i]); - } - - if (value.empty()) { - ss << "value is empty: key: " << key << ", value: " << value; - throw std::invalid_argument(ss.str()); - } - - /// @note this calls derived setProperty if available - if (!setProperty(key, value)) { - remainder.push_back(left_properties[i]); - } - } - - layer->setProperty(remainder); -} - -bool LayerNode::setProperty(const std::string &key, const std::string &value) { - using PropertyType = nntrainer::Layer::PropertyType; - - PropertyType type = static_cast(parseLayerProperty(key)); - switch (type) { - case PropertyType::activation: { - setActivation((ActivationType)parseType(value, TOKEN_ACTI)); - if (getType() == ActivationLayer::type) { - ml_logi("Set property delegated to activation layer"); - return false; - } - break; - } - case PropertyType::num_inputs: { - ml_logw("Deprecated property: %s", key.c_str()); - break; - } - default: - return false; - } - - return true; + left_properties = + loadProperties(left_properties, *layer_node_props_realization); + layer->setProperty(left_properties); } const std::string LayerNode::getName() const noexcept { @@ -279,7 +210,14 @@ std::ostream &operator<<(std::ostream &out, const LayerNode &l) { return out; } -ActivationType LayerNode::getActivationType() const { return activation_type; } +ActivationType LayerNode::getActivationType() const { + auto &act_prop = std::get(*layer_node_props_realization); + if (act_prop.empty()) { + return ActivationType::ACT_NONE; + } + + return act_prop; +} unsigned int LayerNode::getNumInputConnections() const { auto &input_layers = @@ -297,14 +235,7 @@ ActivationType LayerNode::getActivationToBeRealized() const { if (getType() == ActivationLayer::type) return ActivationType::ACT_NONE; else - return activation_type; -} - -void LayerNode::setActivation(ActivationType activation) { - if (activation == ActivationType::ACT_UNKNOWN) { - throw std::invalid_argument("Error:have to specify activation function"); - } - activation_type = activation; + return getActivationType(); } const std::string LayerNode::getType() const { return getLayer()->getType(); } @@ -314,7 +245,7 @@ bool LayerNode::getTrainable() const { } bool LayerNode::getFlatten() const { - auto &flatten = std::get(*layer_node_props); + auto &flatten = std::get(*layer_node_props_realization); if (flatten.empty()) { return false; } @@ -330,15 +261,15 @@ bool LayerNode::getDistribute() const { } const nntrainer::Layer *LayerNode::getLayer() const { - if (getDistribute()) - return ((TimeDistLayer *)(layer.get()))->getDistLayer(); + if (run_context && getDistribute()) + return static_cast(layer.get())->getDistLayer(); else return layer.get(); } nntrainer::Layer *LayerNode::getLayer() { - if (getDistribute()) - return ((TimeDistLayer *)(layer.get()))->getDistLayer(); + if (run_context && getDistribute()) + return static_cast(layer.get())->getDistLayer(); else return layer.get(); } @@ -439,9 +370,14 @@ void LayerNode::save(std::ofstream &file) const { * @brief Finalize creating the layer node */ InitLayerContext LayerNode::finalize(const std::vector &input_dims) { + /** Create init context right before finalize */ + if (run_context) + throw std::runtime_error("Finalizing a layer which is already finalized"); + std::vector actual_input_dims; auto &prop_dims = std::get>(*layer_node_props); + /** prepare input dimensions */ if (!input_dims.empty()) { actual_input_dims = input_dims; if (hasInputShapeProperty()) { @@ -456,6 +392,7 @@ InitLayerContext LayerNode::finalize(const std::vector &input_dims) { NNTR_THROW_IF(!hasInputShapeProperty(), std::invalid_argument) << "if input dims not given, input shapes must be given by the user as " "property"; + /// arguably, below check can go away NNTR_THROW_IF(prop_dims.size() != 1, std::invalid_argument) << "input shapes must be one if connection is not given but given " "dimesions size of: " @@ -475,6 +412,25 @@ InitLayerContext LayerNode::finalize(const std::vector &input_dims) { if (run_context) throw std::runtime_error("Finalizing a layer which is already finalized"); + /** manipulate layers if required */ + if (getType() == ActivationLayer::type) { + auto &act_prop = std::get(*layer_node_props_realization); + if (!act_prop.empty()) { + layer->setProperty({"activation=" + to_string(act_prop)}); + } + } + if (getType() != TimeDistLayer::type && getDistribute()) { + std::unique_ptr dlayer(new TimeDistLayer()); + NNTR_THROW_IF(!dlayer, std::invalid_argument) + << "Error creating time distribution layer"; + dlayer->setDistLayer(std::move(layer)); + layer = std::move(dlayer); + } + + /// remove flatten and activation since it's already realized + layer_node_props_realization = std::make_unique( + props::Flatten(), props::Activation()); + auto num_outputs = output_layers.size(); if (output_layers.empty()) { num_outputs = 1; diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index 258dfac..69f2307 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -28,7 +28,6 @@ #include #include -#include #include #include #include @@ -46,10 +45,10 @@ namespace props { class Name; class Distribute; class Flatten; -class ActivationType; class Loss; class InputLayer; class InputShape; +class Activation; } // namespace props /** @@ -597,8 +596,6 @@ private: layer; /**< The actual object in the graph node */ std::vector output_layers; /**< output layer names */ - ActivationType - activation_type; /**< activation applied to the output of this node */ std::unique_ptr run_context; /**< context required for running/execution of the layer. This @@ -607,30 +604,28 @@ upon final creation. Editing properties of the layer after init will not the properties in the context/graph unless intended. */ using PropsType = - std::tuple, std::vector>; + + using RealizationPropsType = std::tuple; + /** these realization properties results in addition of new layers, hence + * skipped in generation of model architecture as the correspondingly layer + * itself is added. Distribute is also a property which is realized, but as it + * doesn't add new layer, it is saved. */ + /** * These properties are set for the layer by the user but are intercepted * and used in the node which forms the basic element of the graph. */ std::unique_ptr layer_node_props; /**< properties for the node */ - std::unique_ptr loss; /**< loss */ + std::unique_ptr + layer_node_props_realization; /**< properties for the node */ + std::unique_ptr loss; /**< loss */ float regularization_loss; ExecutionOrder exec_order; /**< order/location of execution for this node in forward and backwarding operations */ /** - * @brief setProperty by PropertyType - * @note By passing empty string, this can validate if @a type is valid - * @param[in] key property type to be passed - * @param[in] value value to be passed, if empty string is passed, do nothing - * but throws error when @a type is invalid - * @return true if the property can be captured, else false - * @exception std::invalid_argument invalid argument - */ - bool setProperty(const std::string &key, const std::string &value); - - /** * @brief Get the effective layer managed by this layer node * * @details this is layer inside the distribution layer if this layer node @@ -647,13 +642,6 @@ properties in the context/graph unless intended. */ nntrainer::Layer *getLayer(); /** - * @brief Activation Setter - * @param[in] activation activation type - * @throw std::invalid_argument when ActivationType is unknown - */ - void setActivation(ActivationType activation); - - /** * @brief anchor point to override if PRINT_SHAPE_INFO is enabled for * Layer::print() */ diff --git a/nntrainer/utils/base_properties.h b/nntrainer/utils/base_properties.h index a186a6b..3cba6d1 100644 --- a/nntrainer/utils/base_properties.h +++ b/nntrainer/utils/base_properties.h @@ -9,6 +9,9 @@ * @author Jihoon Lee * @bug No known bugs except for NYI items */ +#ifndef __BASE_PROPERTIES_H__ +#define __BASE_PROPERTIES_H__ + #include #include #include @@ -20,9 +23,6 @@ #include #include -#ifndef __BASE_PROPERTIES_H__ -#define __BASE_PROPERTIES_H__ - /** base and predefined structures */ namespace nntrainer { -- 2.7.4