[ TOKEN ] Update weight decay to weight regularizer
authorjijoong.moon <jijoong.moon@samsung.com>
Wed, 2 Sep 2020 03:20:22 +0000 (12:20 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Wed, 2 Sep 2020 22:36:52 +0000 (07:36 +0900)
Change Weight_Decay to Weight_Regularizer
Change Weight_Decay_Lambda to Weight_Regularizer_Constant

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
16 files changed:
Applications/Tizen_CAPI/capi_file.c
Applications/Tizen_CAPI/capi_func.c
README.md
api/capi/include/nntrainer.h
docs/configuration-ini.md
nntrainer/include/layer.h
nntrainer/include/optimizer.h
nntrainer/include/parse_util.h
nntrainer/src/conv2d_layer.cpp
nntrainer/src/fc_layer.cpp
nntrainer/src/layer.cpp
nntrainer/src/parse_util.cpp
test/include/nntrainer_test_util.h
test/tizen_capi/unittest_tizen_capi.cpp
test/tizen_capi/unittest_tizen_capi_layer.cpp
test/unittest/unittest_nntrainer_layers.cpp

index e9686ed..ec090e6 100644 (file)
@@ -68,7 +68,7 @@ int main(int argc, char *argv[]) {
   /* set property for fc layer */
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005",
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005",
     "weight_initializer=xavier_uniform", NULL);
   NN_RETURN_STATUS();
 
index 878922d..31073d1 100644 (file)
@@ -321,7 +321,7 @@ int main(int argc, char *argv[]) {
   /* set property for fc layer */
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005",
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005",
     "weight_initializer=xavier_uniform", NULL);
   NN_RETURN_STATUS();
 
index b05411c..76bd5ee 100644 (file)
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ NNTrainer provides
  | relu | relu function | set as layer propery |
  | softmax | softmax function | set as layer propery |
  | weight_initializer | Weight Initialization | Xavier(Normal/Uniform), LeCun(Normal/Uniform),  HE(Normal/Unifor) |
- | weight_decay | weight decay ( L2Norm only ) | needs set weight_decay_param & type |
+ | weight_regularizer | weight decay ( L2Norm only ) | needs set weight_regularizer_param & type |
  | learnig_rate_decay | learning rate decay | need to set step |
 
 ### Tensor
@@ -100,7 +100,7 @@ NNTrainer provides
  | Keyward | Loss Name | Description |
  |:-------:|:---:|:---|
  | weight_initializer | Weight Initialization | Xavier(Normal/Uniform), LeCun(Normal/Uniform),  HE(Normal/Unifor) |
- | weight_decay | weight decay ( L2Norm only ) | needs set weight_decay_param & type |
+ | weight_regularizer | weight decay ( L2Norm only ) | needs set weight_regularizer_constant & type |
  | learnig_rate_decay | learning rate decay | need to set step |
 
 ### APIs
index eca2ae8..15cb85e 100644 (file)
@@ -289,7 +289,7 @@ int ml_train_layer_destroy(ml_train_layer_h layer);
  * // Many of these hyperparmeters are optional
  * status = ml_train_layer_set_property(handle, "input_shape=1:1:6270",
  *      "unit=10", "bias_initializer=zeros", "activation=sigmoid",
- *      "weight_decay=l2_norm", "weight_initializer=he_uniform", NULL);
+ *      "weight_regularizer=l2_norm", "weight_initializer=he_uniform", NULL);
  * if (status != ML_ERROR_NONE) {
  *    // Handle error case
  *    ml_train_layer_destroy(handle);
index 96d0fc6..8d32f12 100644 (file)
@@ -168,7 +168,7 @@ Start with "[ ${layer name} ]". This layer name must be unique throughout networ
    set weight decay
      * l2norm : L2 normalization
 
-9. ```weight_decay_lambda = <float>```
+9. ```weight_regularizer_constant = <float>```
 
    coefficient for weight decay
 
@@ -227,10 +227,10 @@ Each layer requires different properties.
 
  | Layer | Properties |
  |:-------:|:---|
- | conv2d |<ul><li>filters</li><li>kernel_size</li><li>stride</li><li>padding</li><li>normalization</li><li>standardization</li><li>input_shape</li><li>bias_init_zero</li><li>activation</li><li>flatten</li><li>weight_decay</li><li>weight_decay_lambda</li><li>weight_initializer</li></ul>|
+ | conv2d |<ul><li>filters</li><li>kernel_size</li><li>stride</li><li>padding</li><li>normalization</li><li>standardization</li><li>input_shape</li><li>bias_init_zero</li><li>activation</li><li>flatten</li><li>weight_decay</li><li>weight_regularizer_constant</li><li>weight_initializer</li></ul>|
  | pooling2d | <ul><li>pooling</li><li>pool_size</li><li>stride</li><li>padding</li></ul> |
  | flatten | - |
- | fully_connected | <lu><li>unit</li><li>normalization</li><li>standardization</li><li>input_shape</li><li>bias_initializer</li><li>activation</li><li>flatten</li><li>weight_decay</li><li>weight_decay_lambda</li><li>weight_initializer</li></lu>|
+ | fully_connected | <lu><li>unit</li><li>normalization</li><li>standardization</li><li>input_shape</li><li>bias_initializer</li><li>activation</li><li>flatten</li><li>weight_decay</li><li>weight_regularizer_constant</li><li>weight_initializer</li></lu>|
  | input | <lu><li>normalization </li><li>standardization</li><li>input_shape</li><li>flatten</li></lu>|
  | batch_normalization | <lu><li>epsilon</li><li>flatten</li></lu> |
 
index 38c5b63..0a74167 100644 (file)
@@ -143,7 +143,7 @@ public:
     type(LAYER_UNKNOWN),
     loss(0.0f),
     activation_type(ACT_NONE),
-    weight_decay(),
+    weight_regularizer(),
     weight_initializer(WEIGHT_XAVIER_UNIFORM),
     bias_initializer(WEIGHT_ZEROS),
     flatten(false),
@@ -223,8 +223,8 @@ public:
    *            3. standardization : bool
    *            4. activation : string (type)
    *            5. epsilon : float
-   *            6. weight_decay : string (type)
-   *            7. weight_decay_lambda : float
+   *            6. weight_regularizer : string (type)
+   *            7. weight_regularizer_constant : float
    *            8. unit : int
    *            9. weight_initializer : string (type)
    *            10. filter_size : int
@@ -245,8 +245,8 @@ public:
     standardization = 2,
     activation = 3,
     epsilon = 4,
-    weight_decay = 5,
-    weight_decay_lambda = 6,
+    weight_regularizer = 5,
+    weight_regularizer_constant = 6,
     unit = 7,
     weight_initializer = 8,
     bias_initializer = 9,
@@ -328,7 +328,9 @@ public:
    * @brief     set weight decay parameters
    * @param[in] w struct for weight decay
    */
-  void setWeightDecay(WeightDecayParam w) { weight_decay = w; }
+  void setWeightRegularizer(WeightRegularizerParam w) {
+    weight_regularizer = w;
+  }
 
   /**
    * @brief  set Weight Initialization Type
@@ -451,8 +453,8 @@ protected:
    * @brief     check if current layer's weight decay type is l2norm
    * @return    bool is weightdecay type is L2 Norm
    */
-  bool isWeightDecayL2Norm() {
-    return weight_decay.type == WeightDecayType::l2norm;
+  bool isWeightRegularizerL2Norm() {
+    return weight_regularizer.type == WeightRegularizerType::l2norm;
   }
   /**
    * @brief     Input Tensor
@@ -492,7 +494,7 @@ protected:
 
   ActiType activation_type;
 
-  WeightDecayParam weight_decay;
+  WeightRegularizerParam weight_regularizer;
 
   WeightInitializer weight_initializer; /** initializer for weights */
 
index 02b959e..04f10cd 100644 (file)
@@ -52,17 +52,19 @@ enum class OptType { sgd = 0, adam = 1, unknown = 2 };
  *            1. Regression
  *            2. Unknown (equivalent to none)
  */
-enum class WeightDecayType { l2norm = 0, regression = 1, unknown = 2 };
+enum class WeightRegularizerType { l2norm = 0, regression = 1, unknown = 2 };
 
 /**
  * @brief     type for the Weight Decay hyper-parameter
  */
-typedef struct WeightDecayParam_ {
-  WeightDecayType type;
-  float lambda;
-
-  WeightDecayParam_() : type(WeightDecayType::unknown), lambda(0.0f) {}
-} WeightDecayParam;
+typedef struct WeightRegularizerParam_ {
+  WeightRegularizerType type;
+  float constant;
+
+  WeightRegularizerParam_() :
+    type(WeightRegularizerType::unknown),
+    constant(0.0f) {}
+} WeightRegularizerParam;
 
 /**
  * @brief     type for the Optimizor to save hyper-parameter
index f31dedc..76652e2 100644 (file)
@@ -45,7 +45,7 @@ namespace nntrainer {
  *            3. ACTI    ( Activation Token )
  *            4. LAYER   ( Layer Token )
  *            5. WEIGHT_INIT  ( Weight Initializer Token )
- *            7. WEIGHT_DECAY  ( Weight Decay Token )
+ *            7. WEIGHT_REGULARIZER  ( Weight Decay Token )
  *            8. PADDING  ( Padding Token )
  *            9. POOLING  ( Pooling Token )
  *            9. UNKNOWN
@@ -57,7 +57,7 @@ typedef enum {
   TOKEN_ACTI,
   TOKEN_LAYER,
   TOKEN_WEIGHT_INIT,
-  TOKEN_WEIGHT_DECAY,
+  TOKEN_WEIGHT_REGULARIZER,
   TOKEN_PADDING,
   TOKEN_POOLING,
   TOKEN_UNKNOWN
index 1954ac8..05ff064 100644 (file)
@@ -156,10 +156,10 @@ sharedConstTensor Conv2DLayer::forwarding(sharedConstTensor in) {
 #endif
 
   loss = 0.0f;
-  if (weight_decay.type == WeightDecayType::l2norm) {
+  if (weight_regularizer.type == WeightRegularizerType::l2norm) {
     for (unsigned int i = 0; i < filter_size; ++i) {
       Tensor &weight = paramsAt(i).weight;
-      loss += weight_decay.lambda * 0.5f * (weight.l2norm());
+      loss += weight_regularizer.constant * 0.5f * (weight.l2norm());
     }
     loss /= filter_size;
   }
@@ -337,8 +337,8 @@ sharedConstTensor Conv2DLayer::backwarding(sharedConstTensor derivative,
       Tensor &filters = paramsAt(i).weight;
 
       delK = delK.chain()
-               .applyIf(this->isWeightDecayL2Norm(), _LIFT(add_i), filters,
-                        weight_decay.lambda)
+               .applyIf(this->isWeightRegularizerL2Norm(), _LIFT(add_i),
+                        filters, weight_regularizer.constant)
                .run();
     }
 
index ebf70af..1f3fe58 100644 (file)
@@ -83,8 +83,8 @@ sharedConstTensor FullyConnectedLayer::forwarding(sharedConstTensor in) {
   hidden = input.dot(weight);
   hidden.add_i(bias);
 
-  if (weight_decay.type == WeightDecayType::l2norm) {
-    loss = weight_decay.lambda * 0.5f * (weight.l2norm());
+  if (weight_regularizer.type == WeightRegularizerType::l2norm) {
+    loss = weight_regularizer.constant * 0.5f * (weight.l2norm());
   }
 
   return MAKE_SHARED_TENSOR(hidden);
@@ -126,8 +126,8 @@ sharedConstTensor FullyConnectedLayer::backwarding(sharedConstTensor derivative,
   djdb = derivative->sum(0);
 
   djdw = input.dot(*derivative, true, false);
-  if (isWeightDecayL2Norm())
-    djdw.add_i(weight, weight_decay.lambda);
+  if (isWeightRegularizerL2Norm())
+    djdw.add_i(weight, weight_regularizer.constant);
   djdw = djdw.sum(0);
 
   if (trainable) {
index 12cbc5c..fc16e67 100644 (file)
@@ -198,17 +198,18 @@ void Layer::setProperty(const PropertyType type, const std::string &value) {
       throw_status(status);
     }
     break;
-  case PropertyType::weight_decay:
+  case PropertyType::weight_regularizer:
     if (!value.empty()) {
-      weight_decay.type = (WeightDecayType)parseType(value, TOKEN_WEIGHT_DECAY);
-      if (weight_decay.type == WeightDecayType::unknown) {
+      weight_regularizer.type =
+        (WeightRegularizerType)parseType(value, TOKEN_WEIGHT_REGULARIZER);
+      if (weight_regularizer.type == WeightRegularizerType::unknown) {
         throw std::invalid_argument("[Layer] Unknown Weight decay");
       }
     }
     break;
-  case PropertyType::weight_decay_lambda:
+  case PropertyType::weight_regularizer_constant:
     if (!value.empty()) {
-      status = setFloat(weight_decay.lambda, value);
+      status = setFloat(weight_regularizer.constant, value);
       throw_status(status);
     }
     break;
@@ -265,9 +266,10 @@ void Layer::printPropertiesMeta(std::ostream &out) {
 
 void Layer::printProperties(std::ostream &out) {
   out << "Trainable: " << trainable << std::endl;
-  printIfValid(out, PropertyType::weight_decay,
-               static_cast<int>(weight_decay.type));
-  printIfValid(out, PropertyType::weight_decay_lambda, weight_decay.lambda);
+  printIfValid(out, PropertyType::weight_regularizer,
+               static_cast<int>(weight_regularizer.type));
+  printIfValid(out, PropertyType::weight_regularizer_constant,
+               weight_regularizer.constant);
 }
 
 void Layer::printMetric(std::ostream &out) {
index be71406..850e32a 100644 (file)
@@ -132,7 +132,8 @@ unsigned int parseType(std::string ll, InputType t) {
    *            "L2Norm"  : squared norm regularization
    *            "Regression" : Regression
    */
-  std::array<std::string, 2> weight_decay_string = {"l2norm", "regression"};
+  std::array<std::string, 2> weight_regularizer_string = {"l2norm",
+                                                          "regression"};
 
   /**
    * @brief     Weight Decay String from configure file
@@ -210,14 +211,14 @@ unsigned int parseType(std::string ll, InputType t) {
     }
     ret = (unsigned int)WeightInitializer::WEIGHT_UNKNOWN;
     break;
-  case TOKEN_WEIGHT_DECAY:
-    for (i = 0; i < weight_decay_string.size(); i++) {
-      if (!strncasecmp(weight_decay_string[i].c_str(), ll.c_str(),
-                       weight_decay_string[i].size())) {
+  case TOKEN_WEIGHT_REGULARIZER:
+    for (i = 0; i < weight_regularizer_string.size(); i++) {
+      if (!strncasecmp(weight_regularizer_string[i].c_str(), ll.c_str(),
+                       weight_regularizer_string[i].size())) {
         return (i);
       }
     }
-    ret = (unsigned int)WeightDecayType::unknown;
+    ret = (unsigned int)WeightRegularizerType::unknown;
     break;
   case TOKEN_PADDING:
     for (i = 0; i < padding_string.size(); i++) {
@@ -253,8 +254,8 @@ unsigned int parseType(std::string ll, InputType t) {
  * standardization = 2
  * activation = 3
  * epsilon = 4
- * weight_decay = 5
- * weight_decay_lambda = 6
+ * weight_regularizer = 5
+ * weight_regularizer_constant = 6
  * unit = 7
  * weight_initializer = 8
  * bias_initializer = 9
@@ -276,28 +277,29 @@ unsigned int parseType(std::string ll, InputType t) {
  * Pooling2DLayer has 12, 13, 14, 15 properties.
  * BatchNormalizationLayer has 0, 1, 5, 6, 7 properties.
  */
-static std::array<std::string, 22> property_string = {"input_shape",
-                                                      "normalization",
-                                                      "standardization",
-                                                      "activation",
-                                                      "epsilon",
-                                                      "weight_decay",
-                                                      "weight_decay_lambda",
-                                                      "unit",
-                                                      "weight_initializer",
-                                                      "bias_initializer",
-                                                      "filters",
-                                                      "kernel_size",
-                                                      "stride",
-                                                      "padding",
-                                                      "pool_size",
-                                                      "pooling",
-                                                      "flatten",
-                                                      "name",
-                                                      "num_inputs",
-                                                      "num_outputs",
-                                                      "batch_size",
-                                                      "unknown"};
+static std::array<std::string, 22> property_string = {
+  "input_shape",
+  "normalization",
+  "standardization",
+  "activation",
+  "epsilon",
+  "weight_regularizer",
+  "weight_regularizer_constant",
+  "unit",
+  "weight_initializer",
+  "bias_initializer",
+  "filters",
+  "kernel_size",
+  "stride",
+  "padding",
+  "pool_size",
+  "pooling",
+  "flatten",
+  "name",
+  "num_inputs",
+  "num_outputs",
+  "batch_size",
+  "unknown"};
 
 unsigned int parseLayerProperty(std::string property) {
   unsigned int i;
index ca62adb..a8df7cd 100644 (file)
@@ -186,9 +186,9 @@ const std::string config_str = "[Model]"
                                "\n"
                                "Loss = cross"
                                "\n"
-                               "Weight_Decay = l2norm"
+                               "Weight_Regularizer = l2norm"
                                "\n"
-                               "weight_Decay_Lambda = 0.005"
+                               "weight_regularizer_constant = 0.005"
                                "\n"
                                "Save_Path = 'model.bin'"
                                "\n"
@@ -249,9 +249,9 @@ const std::string config_str2 = "[Model]"
                                 "\n"
                                 "Loss = cross"
                                 "\n"
-                                "Weight_Decay = l2norm"
+                                "Weight_Regularizer = l2norm"
                                 "\n"
-                                "weight_Decay_Lambda = 0.005"
+                                "weight_regularizer_constant = 0.005"
                                 "\n"
                                 "Model = 'model.bin'"
                                 "\n"
@@ -283,9 +283,9 @@ const std::string config_str2 = "[Model]"
                                 "\n"
                                 "Activation = sigmoid"
                                 "\n"
-                                "weight_decay=l2norm"
+                                "weight_regularizer=l2norm"
                                 "\n"
-                                "weight_decay_lambda=0.005"
+                                "weight_regularizer_constant=0.005"
                                 "\n"
                                 "filters=6"
                                 "\n"
index 484bff6..6d239c9 100644 (file)
@@ -143,7 +143,7 @@ TEST(nntrainer_capi_nnmodel, compile_05_p) {
 
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005",
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005",
     "weight_initializer=xavier_uniform", "name=fc100", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
@@ -414,7 +414,7 @@ TEST(nntrainer_capi_nnmodel, addLayer_04_p) {
 
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005", NULL);
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_model_add_layer(model, layers[1]);
@@ -455,7 +455,7 @@ TEST(nntrainer_capi_nnmodel, addLayer_05_n) {
 
   status = ml_train_layer_set_property(
     layer, "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005", NULL);
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_model_add_layer(model, layer);
@@ -524,7 +524,7 @@ TEST(nntrainer_capi_nnmodel, create_optimizer_02_p) {
 
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005", NULL);
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_model_add_layer(model, layers[1]);
@@ -574,7 +574,7 @@ TEST(nntrainer_capi_nnmodel, create_optimizer_03_p) {
 
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005", NULL);
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_model_add_layer(model, layers[1]);
@@ -628,7 +628,7 @@ TEST(nntrainer_capi_nnmodel, train_with_file_01_p) {
 
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005",
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005",
     "weight_initializer=xavier_uniform", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
@@ -697,7 +697,7 @@ TEST(nntrainer_capi_nnmodel, train_with_generator_01_p) {
 
   status = ml_train_layer_set_property(
     layers[1], "unit= 10", "activation=softmax", "bias_initializer=zeros",
-    "weight_decay=l2norm", "weight_decay_lambda=0.005",
+    "weight_regularizer=l2norm", "weight_regularizer_constant=0.005",
     "weight_initializer=xavier_uniform", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
index 6e2dcc5..faba714 100644 (file)
@@ -177,8 +177,9 @@ TEST(nntrainer_capi_nnlayer, setproperty_08_p) {
   int status;
   status = ml_train_layer_create(&handle, ML_TRAIN_LAYER_TYPE_FC);
   EXPECT_EQ(status, ML_ERROR_NONE);
-  status = ml_train_layer_set_property(handle, "weight_decay=l2norm",
-                                       "weight_decay_lambda=0.0001", NULL);
+  status =
+    ml_train_layer_set_property(handle, "weight_regularizer=l2norm",
+                                "weight_regularizer_constant=0.0001", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
   status = ml_train_layer_destroy(handle);
   EXPECT_EQ(status, ML_ERROR_NONE);
@@ -192,8 +193,9 @@ TEST(nntrainer_capi_nnlayer, setproperty_09_n) {
   int status;
   status = ml_train_layer_create(&handle, ML_TRAIN_LAYER_TYPE_FC);
   EXPECT_EQ(status, ML_ERROR_NONE);
-  status = ml_train_layer_set_property(handle, "weight_decay=asdfasd",
-                                       "weight_decay_lambda=0.0001", NULL);
+  status =
+    ml_train_layer_set_property(handle, "weight_regularizer=asdfasd",
+                                "weight_regularizer_constant=0.0001", NULL);
   EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER);
   status = ml_train_layer_destroy(handle);
   EXPECT_EQ(status, ML_ERROR_NONE);
index 24e9042..250e1a2 100644 (file)
@@ -875,8 +875,8 @@ protected:
       setProperty("input_shape=3:28:28 | batch_size=32 |"
                   "bias_initializer=zeros |"
                   "activation=sigmoid |"
-                  "weight_decay=l2norm |"
-                  "weight_decay_lambda= 0.005 |"
+                  "weight_regularizer=l2norm |"
+                  "weight_regularizer_constant= 0.005 |"
                   "weight_initializer=xavier_uniform |"
                   "normalization=true |"
                   "filters=12 | kernel_size= 5,5 | stride=3,3 | padding=1,1");