From 9dd8c26e1bc37a8726737aa29f474b546ada2f28 Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Tue, 19 Oct 2021 20:19:01 +0900 Subject: [PATCH] [Test] Add recurrent value compare tests This patch adds recurrent value compares golden tests There are 4 cases presented. 1. single fc recurrent 2. stacked fc recurrent 3. single lstm recurrent 4. stacked lstm recurrent **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- .gitignore | 1 + nntrainer/layers/lstm.cpp | 4 +- packaging/unittest_models_v2.tar.gz | Bin 0 -> 1823 bytes test/input_gen/genModelsRecurrent_v2.py | 102 +++++++++++++++++++ test/input_gen/recorder_v2.py | 10 +- test/unittest/meson.build | 1 + test/unittest/models/unittest_models_recurrent.cpp | 108 ++++++++++++++++----- 7 files changed, 198 insertions(+), 28 deletions(-) create mode 100644 packaging/unittest_models_v2.tar.gz create mode 100644 test/input_gen/genModelsRecurrent_v2.py diff --git a/.gitignore b/.gitignore index c4d8534..5678f6f 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ Applications/**/*.bin *.a *.o.d *.nnlayergolden +*.nnmodelgolden # log files *.log diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp index 9248c3a..1d964f3 100644 --- a/nntrainer/layers/lstm.cpp +++ b/nntrainer/layers/lstm.cpp @@ -395,6 +395,7 @@ void LSTMLayer::calcGradient(RunLayerContext &context) { Tensor rdata = incoming_deriv.getSharedDataTensor({d.width()}, b * d.width()); /// @note this is not copying from start ~ end but only start time step + // This is copying for self rolling as well as last recurrent unrolled. if ((unsigned)start_timestep + 1 == max_timestep) { data.fill(rdata); } else { @@ -471,7 +472,8 @@ void LSTMLayer::calcGradient(RunLayerContext &context) { acti_func.run_prime_fn(cs, dc, dh); dc.multiply_i(ho); } else { - /// @todo optimize this by updating run_prime_fn to accumulate + /// @todo optimize this by updating run_prime_fn to accumulate or make + /// it inplace somehow Tensor dc_temp(dc.getDim()); acti_func.run_prime_fn(cs, dc_temp, dh); dc_temp.multiply_i(ho); diff --git a/packaging/unittest_models_v2.tar.gz b/packaging/unittest_models_v2.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d689aebad70ed011c651aec64f58b4251470d20b GIT binary patch literal 1823 zcmV+)2jKW0iwFSkr*2^Y1MQXzP*m3$fY*u$>yqUqFo}xS_G?U zMnjYE5EckoSRMhZES{< z6E9hb!XwXt#Us^XtR19)cr%fDCu3YnCv{};h`^`BQC z!q#&@sgAAhq_6I>_3ZxPRyHQNtYho^TvjhTKV?_U=6||vJNuj$62U$yVoGHfwW^Pj{1v~15SlG?OmgTX2_bvqCQ+HzVnt?XhS0ch2 zV)vr_^9BgInu=RHT(BkcPEepT-QKtXyi$7M^_iz(!9W!ms@_lJODJNx+ZyV2_)tw& zT2k^+3-VNs!^b-}L)iUwa4M%3)Sql8#x<Ua5SKdRzQ7&LLREJjCok1VG`4Z@4Qn;flK+ElE zq#v3IFyRKq^I8zq13ovD_zjurmS!21<8kP#-s>CrOGpA!R4 zC)GGO<|<5eCE>z{TGZL>2m9~&qbDcb+4Fj0;Y?rs`v>q)(+dHnr=e@Ck~F`xkIZ>& zAn_}0p!BGW`gKhPX=rIdqv7N5c1IEH-H-r_{!j~dL#EHmPuD=wCl=$e9vv0j-3zA# z>5!}Ihk$KDbnX0oSe51rU0uhZK`Wqc?0rPerfSHDpn$ABq=oeQ>GMkHyGC_xpGS*? zbFfv^6)0~qfTO!L2CHIl$nRwLM`fNtTa;%}A##9SrpsVz^aM)V-;XwaZU?8T6=3FK zkCwIR(ObK3BhkzST)+PbGu{86+h0GHXZ3%%;(PsHA@k|6g2u4GGd;!5zbv*sA6W-y@cecqIQF+#MZ;Egs}zGi?@LLc(ye zU?=Dv&S(4Ca)llK-BJtslU1G6Mf1vben=987DkeswSR$Q>nljbh!w$tMl#d6ii&&h zEI58Xi$1x=l?c~vCS+)wGOpq2=|e_E5_-M91A<0|Noz{(3v;=gLs{ z_0>=#I0VM;h&+x^d;R!0uCW$WSt42#cF^~zx19J$+;Ev{3*~wth&W}7aelx75^z*X z`;C_nr^$;Cq77}mU^X8090XOJ7=}cNRJo}d z6uCnBp{WSMAH7T?X+H7HETW@oGj7cVmF?dIC&8v8k)(%ldpD|)rUHO*YyGWTvHxMvnDA-kh&?$duYw(2?Vwe8BIhqx!`H%Tx0=Kw)DLIq@>>C&(xH zD+*AV=d|10o4+BK{VmA~k9yDrGH$b*DcdFw+|pA3jt+&a+mB)z40-%pD40@G?`!QK zp}5(wEsG(D%K7vduj~(csEhYR3POdu;=bo4h{|u4h{|u4h{|u4h{|u4i1i| N@=qelE*}6W004Trlu`fy literal 0 HcmV?d00001 diff --git a/test/input_gen/genModelsRecurrent_v2.py b/test/input_gen/genModelsRecurrent_v2.py new file mode 100644 index 0000000..6f968da --- /dev/null +++ b/test/input_gen/genModelsRecurrent_v2.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +## +# Copyright (C) 2021 Jihoon Lee +# +# @file genModelsRecurrent_v2.py +# @date 19 October 2021 +# @brief Generate recurrent model tcs +# @author Jihoon lee + +from recorder_v2 import record_v2, inspect_file +import torch + +class FCUnroll(torch.nn.Module): + def __init__(self, unroll_for=1, num_fc=1): + super().__init__() + self.fcs = torch.nn.ModuleList([torch.nn.Linear(1, 1) for i in range(num_fc)]) + self.unroll_for = unroll_for + # self.loss = torch.nn.MSELoss() + self.loss = torch.nn.Identity() + + def forward(self, inputs, labels): + output = inputs[0] + for i in range(self.unroll_for): + for fc in self.fcs: + output = fc(output) + loss = self.loss(output) + # loss = self.loss(output, labels[0]) + return output, loss + + +class LSTMStacked(torch.nn.Module): + def __init__(self, unroll_for=2, num_lstm=1): + super().__init__() + self.input_size = self.hidden_size = 2 + self.lstms = torch.nn.ModuleList( + [ + torch.nn.LSTMCell(self.input_size, self.hidden_size, bias=True) + for _ in range(num_lstm) + ] + ) + # self.lstm.weight_hh.data.fill_(1.0) + # self.lstm.weight_ih.data.fill_(1.0) + # self.lstm.bias_hh.data.fill_(1.0) + self.unroll_for = unroll_for + self.loss = torch.nn.MSELoss() + + def forward(self, inputs, labels): + # second bias is always set to make it always zero grad. + # this is because that we are only keepting one bias + for lstm in self.lstms: + lstm.bias_ih.data.fill_(0.0) + + hs = [torch.zeros_like(inputs[0]) for _ in self.lstms] + cs = [torch.zeros_like(inputs[0]) for _ in self.lstms] + out = inputs[0] + ret = [] + for _ in range(self.unroll_for): + for i, (lstm, h, c) in enumerate(zip(self.lstms, hs, cs)): + hs[i], cs[i] = lstm(out, (h, c)) + out = hs[i] + ret.append(out) + + ret = torch.stack(ret, dim=1) + loss = self.loss(ret, labels[0]) + return ret, loss + + +if __name__ == "__main__": + record_v2( + FCUnroll(unroll_for=5), + iteration=2, + input_dims=[(1,)], + label_dims=[(1,)], + name="fc_unroll_single", + ) + + record_v2( + FCUnroll(unroll_for=2, num_fc=2), + iteration=2, + input_dims=[(1,)], + label_dims=[(1,)], + name="fc_unroll_stacked", + ) + + record_v2( + LSTMStacked(unroll_for=2, num_lstm=1), + iteration=2, + input_dims=[(3, 2)], + label_dims=[(3, 2, 2)], + name="lstm_single", + ) + + record_v2( + LSTMStacked(unroll_for=2, num_lstm=2), + iteration=2, + input_dims=[(3, 2)], + label_dims=[(3, 2, 2)], + name="lstm_stacked", + ) + + # inspect_file("lstm_single.nnmodelgolden") diff --git a/test/input_gen/recorder_v2.py b/test/input_gen/recorder_v2.py index 44f9daa..dfdc383 100644 --- a/test/input_gen/recorder_v2.py +++ b/test/input_gen/recorder_v2.py @@ -10,11 +10,16 @@ import os import random -import torch +import torch # torch used here is torch==1.9.1 import numpy as np from transLayer_v2 import params_translated +if torch.__version__ != "1.9.1": + print( + "the script was tested at version 1.9.1 it might not work if torch version is different" + ) + SEED = 1234 random.seed(SEED) np.random.seed(SEED) @@ -44,6 +49,7 @@ def _rand_like(*shapes, scale=1, rand="int"): np_array = map(shape_to_np, shapes) return [torch.tensor(t * scale) for t in np_array] + ## # @brief record a torch model # @param iteration number of iteration to record @@ -64,8 +70,6 @@ def record_v2(model, iteration, input_dims, label_dims, name): optimizer = torch.optim.SGD(model.parameters(), lr=0.1) - print(*(model.named_parameters())) - def record_iteration(write_fn): inputs = _rand_like(*input_dims, rand="float") labels = _rand_like(*label_dims, rand="float") diff --git a/test/unittest/meson.build b/test/unittest/meson.build index c8781e1..3642286 100644 --- a/test/unittest/meson.build +++ b/test/unittest/meson.build @@ -11,6 +11,7 @@ unzip_target = [ ['unittest_layers.tar.gz', 'unittest_layers'], ['unittest_layers_v2.tar.gz', 'unittest_layers'], ['unittest_models.tar.gz', 'unittest_models'], + ['unittest_models_v2.tar.gz', 'unittest_models'], ] src_path = meson.source_root() / 'packaging' diff --git a/test/unittest/models/unittest_models_recurrent.cpp b/test/unittest/models/unittest_models_recurrent.cpp index 7093101..e8df1c7 100644 --- a/test/unittest/models/unittest_models_recurrent.cpp +++ b/test/unittest/models/unittest_models_recurrent.cpp @@ -24,35 +24,64 @@ using namespace nntrainer; +static inline constexpr const int NOT_USED_ = 1; + static IniSection nn_base("model", "type = NeuralNetwork"); static std::string fc_base = "type = Fully_connected"; static IniSection sgd_base("optimizer", "Type = sgd"); static IniSection constant_loss("loss", "type = constant_derivative"); -IniWrapper - fc_only_hand_unrolled("fc_only_hand_unrolled", - { - nn_base, - sgd_base, - IniSection("fc_1") + fc_base + - "unit=1 | weight_initializer=ones | " - "bias_initializer=ones | input_shape=1:1:1", - IniSection("fc_2") + fc_base + - "unit=1 | weight_initializer=ones | " - "bias_initializer=ones | shared_from = fc_1", - IniSection("fc_3") + fc_base + - "unit=1 | weight_initializer=ones | " - "bias_initializer=ones | shared_from = fc_1", - constant_loss, - }); - -std::unique_ptr makeSingleLSTM() { +IniWrapper fc_unroll_single( + "fc_unroll_single", + { + nn_base, + sgd_base + "learning_rate=0.1", + IniSection("fc_1") + fc_base + "unit=1 | input_shape=1:1:1", + IniSection("fc_2") + fc_base + "unit=1 | shared_from = fc_1", + IniSection("fc_3") + fc_base + "unit=1 | shared_from = fc_1", + IniSection("fc_4") + fc_base + "unit=1 | shared_from = fc_1", + IniSection("fc_5") + fc_base + "unit=1 | shared_from = fc_1", + constant_loss, + }); + +std::unique_ptr makeFC() { std::unique_ptr nn(new NeuralNetwork()); nn->setProperty({"batch_size=1"}); auto outer_graph = makeGraph({ {"input", {"name=input", "input_shape=1:1:1"}}, /// here lstm_cells is being inserted + {"constant_derivative", {"name=loss", "input_layers=recurrent/a2"}}, + }); + for (auto &node : outer_graph) { + nn->addLayer(node); + } + + auto fcfc = makeGraph({ + {"Fully_connected", {"name=a1", "unit=1"}}, + {"Fully_connected", {"name=a2", "unit=1", "input_layers=a1"}}, + }); + + nn->addWithReferenceLayers(fcfc, "recurrent", {"input"}, {"a1"}, {"a2"}, + ml::train::ReferenceLayersType::RECURRENT, + { + "unroll_for=2", + "return_sequences=false", + "recurrent_input=a1", + "recurrent_output=a2", + }); + + nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"})); + return nn; +} + +static std::unique_ptr makeSingleLSTM() { + std::unique_ptr nn(new NeuralNetwork()); + nn->setProperty({"batch_size=3"}); + + auto outer_graph = makeGraph({ + {"input", {"name=input", "input_shape=1:1:2"}}, + /// here lstm_cells is being inserted {"mse", {"name=loss", "input_layers=lstm_scope/a1"}}, }); for (auto &node : outer_graph) { @@ -60,7 +89,7 @@ std::unique_ptr makeSingleLSTM() { } auto lstm = makeGraph({ - {"lstm", {"name=a1", "input_shape=1:1:1", "unit=1"}}, + {"lstm", {"name=a1", "unit=2"}}, }); nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a1"}, @@ -76,14 +105,45 @@ std::unique_ptr makeSingleLSTM() { return nn; } +static std::unique_ptr makeStackedLSTM() { + std::unique_ptr nn(new NeuralNetwork()); + nn->setProperty({"batch_size=3"}); + + auto outer_graph = makeGraph({ + {"input", {"name=input", "input_shape=1:1:2"}}, + /// here lstm_cells is being inserted + {"mse", {"name=loss", "input_layers=lstm_scope/a2"}}, + }); + for (auto &node : outer_graph) { + nn->addLayer(node); + } + + auto lstm = makeGraph({ + {"lstm", {"name=a1", "unit=2"}}, + {"lstm", {"name=a2", "unit=2", "input_layers=a1"}}, + }); + + nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a2"}, + ml::train::ReferenceLayersType::RECURRENT, + { + "unroll_for=2", + "return_sequences=true", + "recurrent_input=a1", + "recurrent_output=a2", + }); + + nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"})); + return nn; +} + INSTANTIATE_TEST_CASE_P( recurrentModels, nntrainerModelTest, ::testing::ValuesIn({ - mkModelIniTc(fc_only_hand_unrolled, "1:1:1", 1, - ModelTestOption::NO_THROW_RUN), - /// @todo make below COMPARE - mkModelTc(makeSingleLSTM, "lstm_return_sequence", "1:2:1", 1, - ModelTestOption::NO_THROW_RUN), + mkModelIniTc(fc_unroll_single, DIM_UNUSED, NOT_USED_, + ModelTestOption::COMPARE_V2), + mkModelTc_V2(makeFC, "fc_unroll_stacked", ModelTestOption::COMPARE_V2), + mkModelTc_V2(makeSingleLSTM, "lstm_single", ModelTestOption::COMPARE_V2), + mkModelTc_V2(makeStackedLSTM, "lstm_stacked", ModelTestOption::COMPARE_V2), }), [](const testing::TestParamInfo &info) { return std::get<1>(info.param); -- 2.7.4