[layer] Support multiple outputs for mol attention layer
[platform/core/ml/nntrainer.git] / test / unittest / unittest_nntrainer_models.cpp
1 // SPDX-License-Identifier: Apache-2.0
2 /**
3  * Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
4  *
5  * @file   unittest_nntrainer_models.cpp
6  * @date   19 Oct 2020
7  * @brief  Model multi iteration, itegrated test
8  * @see    https://github.com/nnstreamer/nntrainer
9  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
10  * @bug    No known bugs except for NYI items
11  *
12  */
13 #include <algorithm>
14 #include <fstream>
15 #include <iostream>
16 #include <memory>
17 #include <vector>
18
19 #include <gtest/gtest.h>
20
21 #include <input_layer.h>
22 #include <layer.h>
23 #include <neuralnet.h>
24
25 #include <models_golden_test.h>
26
27 static nntrainer::IniSection nn_base("model", "type = NeuralNetwork");
28 static std::string input_base = "type = input";
29 static std::string fc_base = "type = Fully_connected";
30 static std::string conv_base = "type = conv2d | stride = 1,1 | padding = 0,0";
31 static std::string rnn_base = "type = rnn";
32 static std::string lstm_base = "type = lstm";
33 static std::string gru_base = "type = gru";
34 static std::string pooling_base = "type = pooling2d | padding = 0,0";
35 static std::string preprocess_flip_base = "type = preprocess_flip";
36 static std::string preprocess_translate_base = "type = preprocess_translate";
37 static std::string mse_base = "type = mse";
38 static std::string cross_base = "type = cross";
39 static std::string cross_softmax_base = "type = cross_softmax";
40
41 static std::string adam_base = "optimizer=adam | beta1 = 0.9 | beta2 = 0.999 | "
42                                "epsilon = 1e-7";
43
44 static nntrainer::IniSection act_base("activation", "Type = Activation");
45 static nntrainer::IniSection softmax_base = act_base + "Activation = softmax";
46 static nntrainer::IniSection sigmoid_base = act_base + "Activation = sigmoid";
47 static nntrainer::IniSection relu_base = act_base + "Activation = relu";
48 static nntrainer::IniSection bn_base("bn", "Type=batch_normalization");
49 static nntrainer::IniSection sgd_base("optimizer", "Type = sgd");
50
51 using I = nntrainer::IniSection;
52 using INI = nntrainer::IniWrapper;
53
54 /**
55  * This is just a wrapper for an ini file with save / erase attached.
56  * for example, fc_softmax_mse contains following ini file representation as a
57  * series of IniSection
58  *
59  * [model]
60  * Type = NeuralNetwork
61  * Learning_rate = 1
62  * Optimizer = sgd
63  * Loss = mse
64  * batch_Size = 3
65  *
66  * [input_1]
67  * Type = input
68  * Input_Shape = 1:1:3
69  *
70  * [dense]
71  * Type = fully_connected
72  * Unit = 5
73  *
74  * [activation]
75  * Type = Activation
76  * Activation = softmax
77  *
78  * [dense]
79  * Type = fully_connected
80  * Unit = 10
81  *
82  * [activation]
83  * Type = Activation
84  * Activation = softmax
85  */
86 // clang-format off
87
88 // TODO: update some models to use loss at the end as a layer
89 // and check for all cases
90
91 INI fc_sigmoid_baseline(
92   "fc_sigmoid",
93   {nn_base + "batch_size = 3",
94    sgd_base + "learning_rate = 1",
95    I("input") + input_base + "input_shape = 1:1:3",
96    I("dense") + fc_base + "unit = 5",
97    I("act") + sigmoid_base,
98    I("dense_1") + fc_base + "unit = 10"});
99
100 INI fc_sigmoid_mse =
101   INI("fc_sigmoid_mse") + fc_sigmoid_baseline + softmax_base + "model/loss=mse";
102
103 INI fc_sigmoid_mse__1 =
104   INI("fc_sigmoid_mse__1") + fc_sigmoid_baseline + softmax_base +  I("loss", mse_base);
105
106 INI fc_sigmoid_baseline_clipped_at_0(
107   "fc_sigmoid",
108   {nn_base + "batch_size = 3",
109    sgd_base + "learning_rate = 1",
110    I("input") + input_base + "input_shape = 1:1:3",
111    I("dense") + fc_base + "unit = 5" + "clip_grad_by_norm = 0.0",
112    I("act") + sigmoid_base,
113    I("dense_1") + fc_base + "unit = 10" + "clip_grad_by_norm = 0.0"});
114
115 INI fc_sigmoid_mse__2 =
116   INI("fc_sigmoid_mse__2") + fc_sigmoid_baseline_clipped_at_0 + softmax_base +  I("loss", mse_base);
117
118 INI fc_sigmoid_baseline_clipped_too_high(
119   "fc_sigmoid",
120   {nn_base + "batch_size = 3",
121    sgd_base + "learning_rate = 1",
122    I("input") + input_base + "input_shape = 1:1:3",
123    I("dense") + fc_base + "unit = 5" + "clip_grad_by_norm = 10000.0",
124    I("act") + sigmoid_base,
125    I("dense_1") + fc_base + "unit = 10" + "clip_grad_by_norm = 10000.0"});
126
127 INI fc_sigmoid_mse__3 =
128   INI("fc_sigmoid_mse__3") + fc_sigmoid_baseline_clipped_too_high + softmax_base +  I("loss", mse_base);
129
130 INI fc_sigmoid_cross =
131   INI("fc_sigmoid_cross") + fc_sigmoid_baseline + softmax_base + "model/loss=cross";
132
133 INI fc_sigmoid_cross__1 =
134   INI("fc_sigmoid_cross__1") + fc_sigmoid_baseline + I("loss", cross_softmax_base);
135
136 INI fc_relu_baseline(
137   "fc_relu",
138   {nn_base + "Loss=mse | batch_size = 3",
139    sgd_base + "learning_rate = 0.1",
140    I("input") + input_base + "input_shape = 1:1:3",
141    I("dense") + fc_base + "unit = 10",
142    I("act") + relu_base,
143    I("dense_1") + fc_base + "unit = 2",
144    I("act_1") + sigmoid_base + "input_layers=dense" + "input_layers=dense_1"});
145
146 INI fc_relu_mse =
147   INI("fc_relu_mse") + fc_relu_baseline + "model/loss=mse";
148
149 INI fc_relu_mse__1 =
150   INI("fc_relu_mse__1") + fc_relu_baseline + I("loss", mse_base);
151
152 INI fc_leaky_relu_mse = INI("fc_relu_leaky_relu") + fc_relu_baseline + "act/activation=leaky_relu";
153
154 INI fc_bn_sigmoid_cross(
155   "fc_bn_sigmoid_cross",
156   {nn_base + "loss=cross | batch_size = 3",
157    sgd_base + "learning_rate = 1",
158    I("input") + input_base + "input_shape = 1:1:3",
159    I("dense") + fc_base + "unit = 10" + "input_layers=input",
160    I("bn") + bn_base + "input_layers=dense",
161    I("act") + sigmoid_base + "input_layers=bn",
162    I("dense_2") + fc_base + "unit = 10" + "input_layers=act",
163    I("act_3") + softmax_base + "input_layers=dense_2"});
164
165 INI fc_bn_sigmoid_mse =
166   INI("fc_bn_sigmoid_mse") + fc_bn_sigmoid_cross + "model/loss=mse";
167
168 std::string mnist_pooling =
169   pooling_base + "| pool_size=2,2 | stride=2,2 | pooling=average | padding=0,0";
170
171 INI mnist_conv_cross(
172   "mnist_conv_cross",
173   {
174     nn_base + "loss=cross | batch_size=3",
175     sgd_base + "learning_rate = 0.1",
176     I("input") + input_base + "input_shape=2:4:5",
177     I("conv2d_c1_layer") + conv_base + "kernel_size=3,4 | filters=2" +"input_layers=input",
178     I("act_1") + sigmoid_base +"input_layers=conv2d_c1_layer",
179     I("pool_1") + mnist_pooling+"input_layers=act_1",
180     I("flatten", "type=flatten")+"input_layers=pool_1" ,
181     I("outputlayer") + fc_base + "unit = 10" +"input_layers=flatten",
182     I("act_3") + softmax_base +"input_layers=outputlayer"
183   }
184 );
185
186 INI conv_1x1(
187   "conv_1x1",
188   {
189     nn_base + "loss=cross | batch_size=3",
190     sgd_base + "learning_rate = 0.1",
191     I("input") + input_base + "input_shape=2:4:5",
192     I("conv2d_c1_layer") + conv_base + "kernel_size=1,1 | filters=4",
193     I("act_1") + sigmoid_base,
194     I("flatten", "type=flatten") ,
195     I("outputlayer") + fc_base + "unit = 10",
196     I("act_2") + softmax_base
197   }
198 );
199
200 INI conv_input_matches_kernel(
201   "conv_input_matches_kernel",
202   {
203     nn_base + "loss=cross | batch_size=3",
204     sgd_base + "learning_rate = 0.1",
205     I("input") + input_base + "input_shape=2:4:5",
206     I("conv2d_c1_layer") + conv_base + "kernel_size=4,5 | filters=4" +"input_layers=input",
207     I("act_1") + sigmoid_base +"input_layers=conv2d_c1_layer",
208     I("flatten", "type=flatten")+"input_layers=act_1",
209     I("outputlayer") + fc_base + "unit = 10" +"input_layers=flatten",
210     I("act_2") + softmax_base +"input_layers=outputlayer"
211   }
212 );
213
214 INI conv_basic(
215   "conv_basic",
216   {
217     nn_base + "loss=cross | batch_size=3",
218     sgd_base + "learning_rate = 0.1",
219     I("input") + input_base + "input_shape=2:5:3",
220     I("conv2d_c1") + conv_base +
221             "kernel_size = 3,3 | filters=4" + "input_layers=input",
222     I("act_1") + sigmoid_base +"input_layers=conv2d_c1",
223     I("flatten", "type=flatten")+"input_layers=act_1",
224     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
225     I("act_2") + softmax_base +"input_layers=outputlayer"
226   }
227 );
228
229 INI conv_same_padding(
230   "conv_same_padding",
231   {
232     nn_base + "loss=cross | batch_size=3",
233     sgd_base + "learning_rate = 0.1",
234     I("input") + input_base + "input_shape=2:5:3",
235     I("conv2d_c1") + conv_base +
236             "kernel_size = 3,3 | filters=4 | padding =same" + "input_layers=input",
237     I("act_1") + sigmoid_base +"input_layers=conv2d_c1",
238     I("flatten", "type=flatten")+"input_layers=act_1",
239     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
240     I("act_2") + softmax_base +"input_layers=outputlayer"
241   }
242 );
243
244 INI conv_multi_stride(
245   "conv_multi_stride",
246   {
247     nn_base + "loss=cross | batch_size=3",
248     sgd_base + "learning_rate = 0.1",
249     I("input") + input_base + "input_shape=2:5:3",
250     I("conv2d_c1") + conv_base +
251             "kernel_size = 3,3 | filters=4 | stride=2,2" + "input_layers=input",
252     I("act_1") + sigmoid_base +"input_layers=conv2d_c1",
253     I("flatten", "type=flatten")+"input_layers=act_1",
254     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
255     I("act_2") + softmax_base +"input_layers=outputlayer"
256   }
257 );
258
259 INI conv_uneven_strides(
260   "conv_uneven_strides",
261   {
262     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
263         I("input") + input_base + "input_shape=2:5:3",
264     I("conv2d_c1") + conv_base +
265             "kernel_size = 3,3 | filters=4 | stride=3,3" + "input_layers=input",
266     I("act_1") + sigmoid_base +"input_layers=conv2d_c1",
267     I("flatten", "type=flatten")+"input_layers=act_1",
268     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
269     I("act_2") + softmax_base +"input_layers=outputlayer"
270   }
271 );
272
273 INI conv_uneven_strides2(
274   "conv_uneven_strides2",
275   {
276     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
277     I("input") + input_base + "input_shape=2:4:4",
278     I("conv2d_c1") + conv_base + "kernel_size = 2,2 | filters=2 | stride=1,2",
279     I("act_1") + sigmoid_base,
280     I("flatten", "type=flatten"),
281     I("outputlayer") + fc_base + "unit = 10",
282     I("act_2") + softmax_base
283   }
284 );
285
286 INI conv_uneven_strides3(
287   "conv_uneven_strides3",
288   {
289     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
290     I("input") + input_base + "input_shape=2:4:4",
291     I("conv2d_c1") + conv_base + "kernel_size = 2,2 | filters=2 | stride=2,1",
292     I("act_1") + sigmoid_base,
293     I("flatten", "type=flatten"),
294     I("outputlayer") + fc_base + "unit = 10",
295     I("act_2") + softmax_base
296   }
297 );
298
299 INI conv_bn(
300   "conv_bn",
301   {
302     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
303     I("input_layer") + input_base + "input_shape=2:3:5",
304     I("conv2d_c1") + conv_base + "kernel_size = 2,2 | filters=2",
305     I("bn") + bn_base,
306     I("act_1") + relu_base,
307     I("flatten", "type=flatten"),
308     I("outputlayer") + fc_base + "unit = 10",
309     I("act_2") + softmax_base
310   }
311 );
312
313 INI conv_same_padding_multi_stride(
314   "conv_same_padding_multi_stride",
315   {
316     nn_base + "loss=cross | batch_size=3",
317     sgd_base + "learning_rate = 0.1",
318     I("input") + input_base + "input_shape=2:5:3",
319     I("conv2d_c1") + conv_base +
320             "kernel_size = 3,3 | filters=4 | stride=2,2 | padding=same" + "input_layers=input",
321     I("act_1") + sigmoid_base +"input_layers=conv2d_c1",
322     I("flatten", "type=flatten")+"input_layers=act_1",
323     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
324     I("act_2") + softmax_base +"input_layers=outputlayer"
325   }
326 );
327
328 INI conv_no_loss(
329   "conv_no_loss",
330   {
331     nn_base + "batch_size=3",
332     sgd_base + "learning_rate = 0.1",
333     I("input") + input_base + "input_shape=2:4:5",
334     I("conv2d_c1_layer") + conv_base + "kernel_size=4,5 | filters=4" +"input_layers=input",
335     I("act_1") + sigmoid_base +"input_layers=conv2d_c1_layer",
336     I("flatten", "type=flatten")+"input_layers=act_1" ,
337     I("outputlayer") + fc_base + "unit = 10" +"input_layers=flatten",
338     I("act_2") + softmax_base +"input_layers=outputlayer"
339   }
340 );
341
342 INI pooling_max_same_padding(
343   "pooling_max_same_padding",
344   {
345     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
346         I("input") + input_base + "input_shape=2:5:3",
347     I("pooling_1") + pooling_base +
348             "pooling=max | pool_size = 3,3 | padding =same" + "input_layers=input",
349     I("act_1") + sigmoid_base + "input_layers=pooling_1",
350     I("flatten", "type=flatten")+ "input_layers=act_1",
351     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
352     I("act_2") + softmax_base + "input_layers=outputlayer"
353   }
354 );
355
356 INI pooling_max_same_padding_multi_stride(
357   "pooling_max_same_padding_multi_stride",
358   {
359     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
360         I("input") + input_base + "input_shape=2:3:5",
361     I("pooling_1") + pooling_base +
362             "pooling=max | pool_size = 3,3 | padding =1 | stride=2,2" + "input_layers=input",
363     I("act_1") + sigmoid_base + "input_layers=pooling_1",
364     I("flatten", "type=flatten")+ "input_layers=act_1",
365     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
366     I("act_2") + softmax_base + "input_layers=outputlayer"
367   }
368 );
369
370 INI pooling_max_valid_padding(
371   "pooling_max_valid_padding",
372   {
373     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
374         I("input") + input_base + "input_shape=2:5:3",
375     I("pooling_1") + pooling_base +
376             "pooling=max | pool_size = 3,3 | padding =valid" + "input_layers=input",
377     I("act_1") + sigmoid_base + "input_layers=pooling_1",
378     I("flatten", "type=flatten")+ "input_layers=act_1",
379     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
380     I("act_2") + softmax_base + "input_layers=outputlayer"
381   }
382 );
383
384 INI pooling_avg_same_padding(
385   "pooling_avg_same_padding",
386   {
387     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
388         I("input") + input_base + "input_shape=2:5:3",
389     I("pooling_1") + pooling_base +
390             "pooling=average | pool_size = 3,3 | padding =1,1,1,1" + "input_layers=input",
391     I("act_1") + sigmoid_base + "input_layers=pooling_1",
392     I("flatten", "type=flatten")+ "input_layers=act_1",
393     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
394     I("act_2") + softmax_base + "input_layers=outputlayer"
395   }
396 );
397
398 INI pooling_avg_valid_padding(
399   "pooling_avg_valid_padding",
400   {
401     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
402         I("input") + input_base + "input_shape=2:5:3",
403     I("pooling_1") + pooling_base +
404             "pooling=average | pool_size = 3,3 | padding =valid" + "input_layers=input",
405     I("act_1") + sigmoid_base + "input_layers=pooling_1",
406     I("flatten", "type=flatten")+ "input_layers=act_1",
407     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
408     I("act_2") + softmax_base + "input_layers=outputlayer"
409   }
410 );
411
412 INI pooling_avg_same_padding_multi_stride(
413   "pooling_avg_same_padding_multi_stride",
414   {
415     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
416         I("input") + input_base + "input_shape=2:3:5",
417     I("pooling_1") + pooling_base +
418             "pooling=average | pool_size = 3,3 | padding =same | stride=2,2" + "input_layers=input",
419     I("act_1") + sigmoid_base + "input_layers=pooling_1",
420     I("flatten", "type=flatten")+ "input_layers=act_1",
421     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
422     I("act_2") + softmax_base + "input_layers=outputlayer"
423   }
424 );
425
426 INI pooling_global_avg(
427   "pooling_global_avg",
428   {
429     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
430         I("input") + input_base + "input_shape=2:5:3",
431     I("pooling_1") + pooling_base +
432             "pooling=global_average" + "input_layers=input",
433     I("act_1") + sigmoid_base + "input_layers=pooling_1",
434     I("flatten", "type=flatten")+ "input_layers=act_1",
435     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
436     I("act_2") + softmax_base + "input_layers=outputlayer"
437   }
438 );
439
440 INI pooling_global_max(
441   "pooling_global_max",
442   {
443     nn_base + "learning_rate=0.1 | optimizer=sgd | loss=cross | batch_size=3",
444         I("input") + input_base + "input_shape=2:5:3",
445     I("pooling_1") + pooling_base +
446             "pooling=global_max" + "input_layers=input",
447     I("act_1") + sigmoid_base + "input_layers=pooling_1",
448     I("flatten", "type=flatten")+ "input_layers=act_1",
449     I("outputlayer") + fc_base + "unit = 10" + "input_layers=flatten",
450     I("act_2") + softmax_base + "input_layers=outputlayer"
451   }
452 );
453
454 INI preprocess_flip_validate(
455   "preprocess_flip_validate",
456   {
457     nn_base + "loss=cross | batch_size=3",
458     sgd_base + "learning_rate = 0.1",
459     I("input") + input_base + "input_shape=2:4:5",
460     I("preprocess_flip") + preprocess_flip_base +
461             "flip_direction=vertical" + "input_layers=input",
462     I("conv2d_c1_layer") + conv_base + "kernel_size=3,4 | filters=2" +"input_layers=preprocess_flip",
463     I("act_1") + sigmoid_base +"input_layers=conv2d_c1_layer",
464     I("pool_1") + mnist_pooling+"input_layers=act_1",
465     I("flatten", "type=flatten")+"input_layers=pool_1" ,
466     I("outputlayer") + fc_base + "unit = 10" +"input_layers=flatten",
467     I("act_3") + softmax_base +"input_layers=outputlayer"
468   }
469 );
470
471 INI preprocess_translate(
472   "preprocess_translate",
473   {
474     nn_base + "loss=cross | batch_size=3",
475     sgd_base + "learning_rate = 0.1",
476     I("input") + input_base + "input_shape=2:4:5",
477     I("preprocess_translate") + preprocess_translate_base +
478             "random_translate=0.5" + "input_layers=input",
479     I("conv2d_c1_layer") + conv_base + "kernel_size=3,4 | filters=2" +"input_layers=preprocess_translate",
480     I("act_1") + sigmoid_base +"input_layers=conv2d_c1_layer",
481     I("pool_1") + mnist_pooling+"input_layers=act_1",
482     I("flatten", "type=flatten")+"input_layers=pool_1" ,
483     I("outputlayer") + fc_base + "unit = 10" +"input_layers=flatten",
484     I("act_3") + softmax_base +"input_layers=outputlayer"
485   }
486 );
487
488 INI mnist_conv_cross_one_input = INI("mnist_conv_cross_one_input") + mnist_conv_cross + "model/batch_size=1";
489
490 INI fc_softmax_mse_distribute(
491   "fc_softmax_mse_distribute",
492   {
493     nn_base + "loss=mse | batch_size = 3",
494     sgd_base + "learning_rate = 1",
495     I("input") + input_base + "input_shape = 1:5:5",
496     I("dense") + fc_base + "unit = 3"+"activation=softmax"+"distribute=true"
497   }
498 );
499
500 INI fc_softmax_cross_distribute(
501   "fc_softmax_cross_distribute",
502   {
503     nn_base + "loss=cross | batch_size = 3",
504     sgd_base + "learning_rate = 1",
505     I("input") + input_base + "input_shape = 1:5:5",
506     I("dense") + fc_base + "unit = 3"+"activation=softmax"+"distribute=true"
507   }
508 );
509
510 INI fc_sigmoid_cross_distribute(
511   "fc_sigmoid_cross_distribute",
512   {
513     nn_base + "loss=cross | batch_size = 3",
514     sgd_base + "learning_rate = 1",
515     I("input") + input_base + "input_shape = 1:5:5",
516     I("dense") + fc_base + "unit = 3"+"activation=sigmoid"+"distribute=true"
517   }
518 );
519
520 INI addition_resnet_like(
521   "addition_resnet_like",
522   {
523     nn_base + "loss=mse | batch_size = 3",
524     sgd_base + "learning_rate = 0.1",
525     I("x") + input_base + "input_shape = 2:3:5",
526     I("addition_a1") + conv_base
527       + "filters=4 | kernel_size=3,3 | stride=2,2 | padding=1,1",
528     I("addition_a2") + relu_base,
529     I("addition_a3") + conv_base + "filters=4 | kernel_size=3,3 | padding=1,1",
530     I("addition_b1") + conv_base
531       + "filters=4 | kernel_size=1,1 | stride=2,2"
532       + "input_layers=x",
533     I("addition_c1", "type=addition | input_layers=addition_a3, addition_b1"),
534     I("addition_c2", "type=flatten"),
535     I("addition_c3") + fc_base + "unit=10",
536     I("addition_c4") + softmax_base,
537   }
538 );
539
540 INI lstm_basic(
541   "lstm_basic",
542   {
543     nn_base + "loss=mse | batch_size=1",
544     sgd_base + "learning_rate = 0.1",
545     I("input") + input_base + "input_shape=1:1:1",
546     I("lstm") + lstm_base +
547       "unit = 1" + "input_layers=input",
548     I("outputlayer") + fc_base + "unit = 1" + "input_layers=lstm"
549   }
550 );
551
552 INI lstm_return_sequence(
553   "lstm_return_sequence",
554   {
555     nn_base + "loss=mse | batch_size=1",
556     sgd_base + "learning_rate = 0.1",
557     I("input") + input_base + "input_shape=1:2:1",
558     I("lstm") + lstm_base +
559       "unit = 2" + "input_layers=input"+ "return_sequences=true",
560     I("outputlayer") + fc_base + "unit = 1" + "input_layers=lstm"
561   }
562 );
563
564 INI lstm_return_sequence_with_batch(
565   "lstm_return_sequence_with_batch",
566   {
567     nn_base + "loss=mse | batch_size=2",
568     sgd_base + "learning_rate = 0.1",
569     I("input") + input_base + "input_shape=1:2:1",
570     I("lstm") + lstm_base +
571       "unit = 2" + "input_layers=input"+ "return_sequences=true",
572     I("outputlayer") + fc_base + "unit = 1" + "input_layers=lstm"
573   }
574 );
575 INI rnn_basic(
576   "rnn_basic",
577   {
578     nn_base + "loss=mse | batch_size=1",
579     sgd_base + "learning_rate = 0.1",
580     I("input") + input_base + "input_shape=1:1:1",
581     I("rnn") + rnn_base +
582       "unit = 2" + "input_layers=input",
583     I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
584   }
585 );
586
587 INI rnn_return_sequences(
588   "rnn_return_sequences",
589   {
590     nn_base + "loss=mse | batch_size=1",
591     sgd_base + "learning_rate = 0.1",
592     I("input") + input_base + "input_shape=1:2:1",
593     I("rnn") + rnn_base +
594       "unit = 2" + "input_layers=input" + "return_sequences=true",
595     I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
596   }
597 );
598
599 INI multi_lstm_return_sequence(
600   "multi_lstm_return_sequence",
601   {
602     nn_base + "loss=mse | batch_size=1",
603     sgd_base + "learning_rate = 0.1",
604     I("input") + input_base + "input_shape=1:2:1",
605     I("lstm") + lstm_base +
606       "unit = 2" + "input_layers=input"+ "return_sequences=true",
607     I("lstm2") + lstm_base +
608       "unit = 2" + "input_layers=lstm",
609     I("outputlayer") + fc_base + "unit = 1" + "input_layers=lstm2"
610   }
611 );
612
613 INI multi_lstm_return_sequence_with_batch(
614   "multi_lstm_return_sequence_with_batch",
615   {
616     nn_base + "loss=mse | batch_size=2",
617     sgd_base + "learning_rate = 0.1",
618     I("input") + input_base + "input_shape=1:2:1",
619     I("lstm") + lstm_base +
620       "unit = 2" + "input_layers=input"+ "return_sequences=true",
621     I("lstm2") + lstm_base +
622       "unit = 2" + "input_layers=lstm",
623     I("outputlayer") + fc_base + "unit = 1" + "input_layers=lstm2"
624   }
625 );
626
627 INI rnn_return_sequence_with_batch(
628   "rnn_return_sequence_with_batch",
629   {
630     nn_base + "loss=mse | batch_size=2",
631     sgd_base + "learning_rate = 0.1",
632     I("input") + input_base + "input_shape=1:2:1",
633     I("rnn") + rnn_base +
634       "unit = 2" + "input_layers=input"+ "return_sequences=true",
635     I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
636   }
637 );
638
639 INI multi_rnn_return_sequence(
640   "multi_rnn_return_sequence",
641   {
642     nn_base + "loss=mse | batch_size=1",
643     sgd_base + "learning_rate = 0.1",
644     I("input") + input_base + "input_shape=1:2:1",
645     I("rnn") + rnn_base +
646       "unit = 2" + "input_layers=input"+ "return_sequences=true",
647     I("rnn2") + rnn_base +
648       "unit = 2" + "input_layers=rnn",
649     I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2"
650   }
651 );
652
653 INI multi_rnn_return_sequence_with_batch(
654   "multi_rnn_return_sequence_with_batch",
655   {
656     nn_base + "loss=mse | batch_size=2",
657     sgd_base + "learning_rate = 0.1",
658     I("input") + input_base + "input_shape=1:2:1",
659     I("rnn") + rnn_base +
660       "unit = 2" + "input_layers=input"+ "return_sequences=true",
661     I("rnn2") + rnn_base +
662       "unit = 2" + "input_layers=rnn",
663     I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2"
664   }
665 );
666
667 INI gru_basic(
668   "gru_basic",
669   {
670     nn_base + "loss=mse | batch_size=1",
671     sgd_base + "learning_rate = 0.1",
672     I("input") + input_base + "input_shape=1:1:1",
673     I("gru") + gru_base +
674       "unit = 1" + "input_layers=input",
675     I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
676   }
677 );
678
679 INI gru_return_sequence(
680   "gru_return_sequence",
681   {
682     nn_base + "loss=mse | batch_size=1",
683     sgd_base + "learning_rate = 0.1",
684     I("input") + input_base + "input_shape=1:2:1",
685     I("gru") + gru_base +
686       "unit = 2" + "input_layers=input"+ "return_sequences=true",
687     I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
688   }
689 );
690
691 INI gru_return_sequence_with_batch(
692   "gru_return_sequence_with_batch",
693   {
694     nn_base + "loss=mse | batch_size=2",
695     sgd_base + "learning_rate = 0.1",
696     I("input") + input_base + "input_shape=1:2:1",
697     I("gru") + gru_base +
698       "unit = 2" + "input_layers=input"+ "return_sequences=true",
699     I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
700   }
701 );
702
703 INI multi_gru_return_sequence(
704   "multi_gru_return_sequence",
705   {
706     nn_base + "loss=mse | batch_size=1",
707     sgd_base + "learning_rate = 0.1",
708     I("input") + input_base + "input_shape=1:2:1",
709     I("gru") + gru_base +
710       "unit = 2" + "input_layers=input"+ "return_sequences=true",
711     I("gru2") + gru_base +
712       "unit = 2" + "input_layers=gru",
713     I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru2"
714   }
715 );
716
717 INI multi_gru_return_sequence_with_batch(
718   "multi_gru_return_sequence_with_batch",
719   {
720     nn_base + "loss=mse | batch_size=2",
721     sgd_base + "learning_rate = 0.1",
722     I("input") + input_base + "input_shape=1:2:1",
723     I("gru") + gru_base +
724       "unit = 2" + "input_layers=input"+ "return_sequences=true",
725     I("gru2") + gru_base +
726       "unit = 2" + "input_layers=gru",
727     I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru2"
728   }
729 );
730
731 INI multiple_output_model(
732   "multiple_output_model",
733   {
734     nn_base + "loss=mse | batch_size=3",
735     sgd_base + "learning_rate = 0.1",
736     I("x") + input_base + "input_shape = 2:3:5",
737     I("multiout_a1") + conv_base
738       + "filters=4 | kernel_size=3,3 | stride=2,2 | padding=1,1",
739     I("multiout_a2") + relu_base,
740     I("multiout_a3") + conv_base + "filters=4 | kernel_size=3,3 | padding=1,1",
741     I("multiout_a4", "type=flatten"),
742     I("multiout_a5") + fc_base + "unit=10",
743     I("multiout_a6") + softmax_base,
744     I("multiout_b1") + conv_base
745       + "filters=4 | kernel_size=1,1 | stride=2,2"
746       + "input_layers=x",
747     I("multiout_b2", "type=flatten"),
748     I("multiout_b3") + fc_base + "unit=10",
749     I("multiout_b4") + softmax_base
750   }
751 );
752
753 INI multiout_model(
754   "multiout_model",
755   {
756     nn_base + "loss=mse | batch_size=3",
757     sgd_base + "learning_rate = 0.1",
758     I("x") + input_base + "input_shape = 1:10",
759     I("fc") + fc_base + "unit = 2",
760     I("fc1") + fc_base
761       + "unit=2 | input_layers=fc",
762     I("fc2") + fc_base
763       + "unit=2 | input_layers=fc",
764     I("add1", "type=addition | input_layers=fc1, fc2"),
765     I("fc3") + fc_base + "unit=3",
766     I("sm") + softmax_base
767   }
768 );
769
770 /**
771  * @brief helper function to make model testcase
772  *
773  * @param nntrainer::TensorDim label dimension
774  * @param int Iteration
775  * @param options options
776  */
777 auto mkResNet18Tc(const unsigned int iteration,
778                ModelTestOption options = ModelTestOption::ALL) {
779   unsigned int batch_size = 2;
780   unsigned int num_class = 100;
781   unsigned int count = 0;
782   nntrainer::IniWrapper::Sections layers;
783
784   /** get unique name for a layer */
785   auto getName = [&count]() -> std::string {
786     if (count == 21)
787       std::cout << "mimatch" << std::endl;
788     return "layer" + std::to_string(++count);
789     };
790   auto getPreviousName = [&count]() -> std::string { return "layer" + std::to_string(count); };
791
792   /** add blocks */
793   auto addBlock = [&layers, &getName, &getPreviousName] (
794     unsigned int filters, unsigned int kernel_size, bool downsample) {
795     std::string filter_str = "filters=" + std::to_string(filters);
796     std::string kernel_str = "kernel_size=" + std::to_string(kernel_size) + "," + std::to_string(kernel_size);
797     std::string kernel1_str = "kernel_size=1,1";
798     std::string stride1_str = "stride=1,1";
799     std::string stride2_str = "stride=2,2";
800     std::string padding_str = "padding=same";
801     std::string input_name = getPreviousName();
802     std::string in_layer_str = "input_layers=" + input_name;
803     std::string stride_str = stride1_str;
804     if (downsample)
805       stride_str = stride2_str;
806
807     /** skip connection */
808     std::string b1_name = input_name;
809     if (downsample) {
810       b1_name = getName();
811       layers.push_back(I(b1_name) + conv_base + filter_str +
812       kernel1_str + stride_str + padding_str + in_layer_str);
813     }
814
815     /** main connection */
816     layers.push_back(I(getName()) + conv_base + filter_str +
817     kernel_str + stride_str + padding_str + in_layer_str);
818     layers.push_back(I(getName()) + bn_base);
819     layers.push_back(I(getName()) + relu_base);
820     std::string a1_name = getName();
821     layers.push_back(I(a1_name) + conv_base + filter_str +
822     kernel_str + stride1_str + padding_str);
823
824     /** add the two connections */
825     layers.push_back(I(getName()) + "type=addition" + ("input_layers=" + b1_name + "," + a1_name));
826     layers.push_back(I(getName()) + bn_base);
827     layers.push_back(I(getName()) + relu_base);
828   };
829
830   layers.push_back(nn_base + ("loss=cross | batch_size = " + std::to_string(batch_size)));
831   layers.push_back(sgd_base + "learning_rate = 0.1");
832   /** prefix for resnet model */
833   layers.push_back(I(getName()) + input_base + "input_shape = 3:32:32");
834   layers.push_back(I(getName()) + conv_base + "kernel_size=3,3 | filters=64 | padding=same");
835   layers.push_back(I(getName()) + bn_base);
836   layers.push_back(I(getName()) + relu_base);
837   /** add all the blocks */
838   addBlock(64, 3, false);
839   addBlock(64, 3, false);
840   addBlock(128, 3, true);
841   addBlock(128, 3, false);
842   addBlock(256, 3, true);
843   addBlock(256, 3, false);
844   addBlock(512, 3, true);
845   addBlock(512, 3, false);
846   /** add suffix for resnet model */
847   layers.push_back(I(getName()) + pooling_base + "pooling = average | pool_size=4,4");
848   layers.push_back(I(getName()) + "type=flatten");
849   layers.push_back(I(getName()) + fc_base + "unit=100");
850   layers.push_back(I(getName()) + softmax_base);
851
852   return std::tuple<const nntrainer::IniWrapper, const nntrainer::TensorDim,
853                     const unsigned int, ModelTestOption>(
854     nntrainer::IniWrapper("ResNet18", layers), nntrainer::TensorDim({batch_size, 1,1, num_class}), iteration, options);
855 }
856
857 INSTANTIATE_TEST_CASE_P(
858   nntrainerModelAutoTests, nntrainerModelTest, ::testing::ValuesIn(
859     {
860       mkModelIniTc(fc_sigmoid_mse, "3:1:1:10", 10, ModelTestOption::ALL),
861       mkModelIniTc(fc_sigmoid_mse__1, "3:1:1:10", 1, ModelTestOption::ALL),
862       mkModelIniTc(fc_sigmoid_mse__2, "3:1:1:10", 10, ModelTestOption::ALL),
863       mkModelIniTc(fc_sigmoid_mse__3, "3:1:1:10", 10, ModelTestOption::ALL),
864       mkModelIniTc(fc_sigmoid_cross, "3:1:1:10", 10, ModelTestOption::ALL),
865       mkModelIniTc(fc_sigmoid_cross__1, "3:1:1:10", 1, ModelTestOption::ALL),
866       mkModelIniTc(fc_relu_mse, "3:1:1:2", 10, ModelTestOption::ALL),
867       mkModelIniTc(fc_leaky_relu_mse, "3:1:1:2", 10, ModelTestOption::SAVE_AND_LOAD_INI),
868       mkModelIniTc(fc_relu_mse__1, "3:1:1:2", 1, ModelTestOption::ALL),
869       /// @todo bn with custom initializer
870       mkModelIniTc(fc_bn_sigmoid_cross, "3:1:1:10", 10, ModelTestOption::ALL),
871       mkModelIniTc(fc_bn_sigmoid_mse, "3:1:1:10", 10, ModelTestOption::ALL),
872
873       /**< single conv2d layer test */
874       mkModelIniTc(conv_1x1, "3:1:1:10", 10, ModelTestOption::ALL),
875       mkModelIniTc(conv_input_matches_kernel, "3:1:1:10", 10, ModelTestOption::ALL),
876       mkModelIniTc(conv_basic, "3:1:1:10", 10, ModelTestOption::ALL),
877       mkModelIniTc(conv_same_padding, "3:1:1:10", 10, ModelTestOption::ALL),
878       mkModelIniTc(conv_multi_stride, "3:1:1:10", 10, ModelTestOption::ALL),
879       mkModelIniTc(conv_uneven_strides, "3:1:1:10", 10, ModelTestOption::ALL),
880       mkModelIniTc(conv_uneven_strides2, "3:1:1:10", 10, ModelTestOption::ALL),
881       mkModelIniTc(conv_uneven_strides3, "3:1:1:10", 10, ModelTestOption::ALL),
882       mkModelIniTc(conv_bn, "3:1:1:10", 10, ModelTestOption::ALL),
883       mkModelIniTc(conv_same_padding_multi_stride, "3:1:1:10", 10, ModelTestOption::ALL),
884       mkModelIniTc(conv_no_loss, "3:1:1:10", 1, ModelTestOption::NO_THROW_RUN),
885
886       /**< single pooling layer test */
887       mkModelIniTc(pooling_max_same_padding, "3:1:1:10", 10, ModelTestOption::ALL),
888       mkModelIniTc(pooling_max_same_padding_multi_stride, "3:1:1:10", 10, ModelTestOption::ALL),
889       mkModelIniTc(pooling_max_valid_padding, "3:1:1:10", 10, ModelTestOption::ALL),
890       mkModelIniTc(pooling_avg_same_padding, "3:1:1:10", 10, ModelTestOption::ALL),
891       mkModelIniTc(pooling_avg_same_padding_multi_stride, "3:1:1:10", 10, ModelTestOption::ALL),
892       mkModelIniTc(pooling_avg_valid_padding, "3:1:1:10", 10, ModelTestOption::ALL),
893       mkModelIniTc(pooling_global_avg, "3:1:1:10", 10, ModelTestOption::ALL),
894       mkModelIniTc(pooling_global_max, "3:1:1:10", 10, ModelTestOption::ALL),
895
896       /**< conv pool combined tests */
897       mkModelIniTc(mnist_conv_cross, "3:1:1:10", 10, ModelTestOption::ALL),
898       mkModelIniTc(mnist_conv_cross_one_input, "1:1:1:10", 10, ModelTestOption::ALL),
899
900       /**< augmentation layer */
901   #if defined(ENABLE_DATA_AUGMENTATION_OPENCV)
902       mkModelIniTc(preprocess_translate, "3:1:1:10", 10, ModelTestOption::NO_THROW_RUN),
903   #endif
904       mkModelIniTc(preprocess_flip_validate, "3:1:1:10", 10, ModelTestOption::NO_THROW_RUN),
905
906       /**< Addition test */
907       mkModelIniTc(addition_resnet_like, "3:1:1:10", 10, ModelTestOption::COMPARE), // Todo: Enable option to ALL
908
909       /** Multiout test */
910       mkModelIniTc(multiout_model, "3:1:1:3", 10, ModelTestOption::COMPARE), // Todo: Enable option to ALL
911
912       /// #1192 time distribution inference bug
913       mkModelIniTc(fc_softmax_mse_distribute, "3:1:5:3", 1, ModelTestOption::NO_THROW_RUN),
914       mkModelIniTc(fc_softmax_cross_distribute, "3:1:5:3", 1, ModelTestOption::NO_THROW_RUN),
915       mkModelIniTc(fc_sigmoid_cross_distribute, "3:1:5:3", 1, ModelTestOption::NO_THROW_RUN),
916       mkModelIniTc(lstm_basic, "1:1:1:1", 10, ModelTestOption::ALL),
917       mkModelIniTc(lstm_return_sequence, "1:1:2:1", 10, ModelTestOption::ALL),
918       mkModelIniTc(lstm_return_sequence_with_batch, "2:1:2:1", 10, ModelTestOption::ALL),
919       mkModelIniTc(multi_lstm_return_sequence, "1:1:1:1", 10, ModelTestOption::ALL),
920       mkModelIniTc(multi_lstm_return_sequence_with_batch, "2:1:1:1", 10, ModelTestOption::ALL),
921       mkModelIniTc(rnn_basic, "1:1:1:1", 10, ModelTestOption::ALL),
922       mkModelIniTc(rnn_return_sequences, "1:1:2:1", 10, ModelTestOption::ALL),
923       mkModelIniTc(rnn_return_sequence_with_batch, "2:1:2:1", 10, ModelTestOption::ALL),
924       mkModelIniTc(multi_rnn_return_sequence, "1:1:1:1", 10, ModelTestOption::ALL),
925       mkModelIniTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10, ModelTestOption::ALL),
926       mkModelIniTc(gru_basic, "1:1:1:1", 10, ModelTestOption::ALL),
927       mkModelIniTc(gru_return_sequence, "1:1:2:1", 10, ModelTestOption::ALL),
928       mkModelIniTc(gru_return_sequence_with_batch, "2:1:2:1", 10, ModelTestOption::ALL),
929       mkModelIniTc(multi_gru_return_sequence, "1:1:1:1", 10, ModelTestOption::ALL),
930       mkModelIniTc(multi_gru_return_sequence_with_batch, "2:1:1:1", 10, ModelTestOption::ALL),
931
932       /**< multi output test */
933       mkModelIniTc(multiple_output_model, "3:1:1:10", 10, ModelTestOption::COMPARE) // Todo: Enable option to ALL
934       /** resnet model */
935       // this must match training (verify only forwarding output values) for 2 iterations with tolerance 1.2e-4
936       // mkResNet18Tc(2, ModelTestOption::COMPARE)
937     }
938 ), [](const testing::TestParamInfo<nntrainerModelTest::ParamType>& info){
939  return std::get<1>(info.param);
940 });
941 // clang-format on
942
943 /**
944  * @brief Read or save the model before initialize
945  */
946 TEST(nntrainerModels, read_save_01_n) {
947   nntrainer::NeuralNetwork NN;
948   std::shared_ptr<nntrainer::LayerNode> layer_node =
949     nntrainer::createLayerNode(nntrainer::InputLayer::type,
950                                {"input_shape=1:1:62720", "normalization=true"});
951
952   EXPECT_NO_THROW(NN.addLayer(layer_node));
953   EXPECT_NO_THROW(NN.setProperty({"loss=mse"}));
954
955   EXPECT_THROW(NN.load("model.bin"), std::runtime_error);
956   EXPECT_THROW(NN.save("model.bin"), std::runtime_error);
957
958   EXPECT_EQ(NN.compile(), ML_ERROR_NONE);
959
960   EXPECT_THROW(NN.load("model.bin"), std::runtime_error);
961   EXPECT_THROW(NN.save("model.bin"), std::runtime_error);
962 }
963
964 TEST(nntrainerModels, loadFromLayersBackbone_p) {
965   std::vector<std::shared_ptr<ml::train::Layer>> reference;
966   reference.emplace_back(
967     ml::train::layer::FullyConnected({"name=fc1", "input_shape=3:1:2"}));
968   reference.emplace_back(
969     ml::train::layer::FullyConnected({"name=fc2", "input_layers=fc1"}));
970
971   nntrainer::NeuralNetwork nn;
972   nn.addWithReferenceLayers(reference, "backbone", {}, {"fc1"}, {"fc2"},
973                             ml::train::ReferenceLayersType::BACKBONE, {});
974
975   nn.compile();
976   auto graph = nn.getFlatGraph();
977   for (unsigned int i = 0; i < graph.size(); ++i) {
978     EXPECT_EQ(graph.at(i)->getName(), "backbone/" + reference.at(i)->getName());
979   };
980 }
981
982 TEST(nntrainerModels, loadFromLayersRecurrent_p) {
983   std::vector<std::shared_ptr<ml::train::Layer>> reference;
984   reference.emplace_back(ml::train::layer::FullyConnected({"name=fc1"}));
985   reference.emplace_back(
986     ml::train::layer::FullyConnected({"name=fc2", "input_layers=fc1"}));
987
988   nntrainer::NeuralNetwork nn;
989   nn.addWithReferenceLayers(reference, "recurrent", {"out_source"}, {"fc1"},
990                             {"fc2"}, ml::train::ReferenceLayersType::RECURRENT,
991                             {
992                               "unroll_for=3",
993                               "return_sequences=true",
994                               "recurrent_input=fc1",
995                               "recurrent_output=fc2",
996                             });
997
998   std::vector<std::string> expected_node_names = {
999     "recurrent/fc1/0", "recurrent/fc2/0", "recurrent/fc1/1", "recurrent/fc2/1",
1000     "recurrent/fc1/2", "recurrent/fc2/2", "recurrent/fc2"};
1001   std::vector<std::string> expected_input_layers = {
1002     "out_source" /**< input added with external_input */,
1003     "recurrent/fc1/0",
1004     "recurrent/fc2/0",
1005     "recurrent/fc1/1",
1006     "recurrent/fc2/1",
1007     "recurrent/fc1/2",
1008     "recurrent/fc2/0" /**< out source's first input */,
1009   };
1010
1011   auto graph = nn.getFlatGraph();
1012   for (unsigned int i = 0; i < graph.size(); ++i) {
1013     EXPECT_EQ(graph.at(i)->getName(), expected_node_names.at(i)) << "at " << i;
1014     EXPECT_EQ(graph.at(i)->getInputConnectionName(0),
1015               expected_input_layers.at(i))
1016       << "at " << i;
1017   };
1018 }
1019
1020 /**
1021  * @brief Main gtest
1022  */
1023 int main(int argc, char **argv) {
1024   int result = -1;
1025
1026   try {
1027     testing::InitGoogleTest(&argc, argv);
1028   } catch (...) {
1029     std::cerr << "Error duing IniGoogleTest" << std::endl;
1030     return 0;
1031   }
1032
1033   try {
1034     result = RUN_ALL_TESTS();
1035   } catch (...) {
1036     std::cerr << "Error duing RUN_ALL_TESTS()" << std::endl;
1037   }
1038
1039   return result;
1040 }