a5066e6626b2955ed8a36aa3134dd1d0b6dc0cc8
[platform/core/ml/nntrainer.git] / test / input_gen / recorder.py
1 #!/usr/bin/env python3
2 # SPDX-License-Identifier: Apache-2.0
3 ##
4 # Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
5 #
6 # @file recorder.py
7 # @date 13 October 2020
8 # @brief Generate tc from given keras model
9 # @author Jihoon lee <jhoon.it.lee@samsung.com>
10
11 from functools import wraps
12 import sys
13 import os
14 import warnings
15 import random
16 from collections import defaultdict
17
18 with warnings.catch_warnings():
19     warnings.filterwarnings("ignore", category=FutureWarning)
20     import numpy as np
21     import tensorflow as tf
22     import tensorflow.keras as K
23
24 from transLayer import attach_trans_layer, MultiOutLayer
25
26 __all__ = ["record", "record_single"]
27
28 tf.compat.v1.enable_eager_execution()
29 # Fix the seeds across frameworks
30 SEED = 1234
31 random.seed(SEED)
32 tf.compat.v1.set_random_seed(SEED)
33 np.random.seed(SEED)
34
35 LOSS_FN = {
36     "mse": lambda: tf.keras.losses.MeanSquaredError(),
37     "cross_sigmoid": lambda: tf.keras.losses.BinaryCrossentropy(from_logits=True),
38     "cross_softmax": lambda: tf.keras.losses.CategoricalCrossentropy(from_logits=True),
39 }
40
41
42 def _flatten(l: list):
43     for el in l:
44         if isinstance(el, list):
45             yield from _flatten(el)
46         else:
47             yield el
48
49
50 def _get_loss_fn(loss_fn_representation):
51     try:
52         return LOSS_FN[loss_fn_representation]()
53     except KeyError:
54         raise ValueError("given loss fn representation is not available")
55
56
57 def _get_writer(file):
58     def write_fn(*items):
59         for item in items:
60             try:
61                 item.numpy().tofile(file)
62             except AttributeError:
63                 pass
64
65         return items
66
67     return write_fn
68
69
70 def _rand_like(tensorOrShape, scale=1, rand='int'):
71     try:
72         shape = tensorOrShape.shape
73     except AttributeError:
74         shape = tensorOrShape
75
76     # for relu based models, range of 0 to x is better than -x to x
77     if rand == 'int':
78         t = np.random.randint(0, 10, shape).astype(dtype=np.float32)
79     else:
80         t = np.random.rand(*shape).astype(dtype=np.float32)
81     return tf.convert_to_tensor(t) * scale
82
83
84 ##
85 # @brief access keras layer hidden inside a tensor
86 # @note this function is relying on non-api implementation, this might break in the future
87 # @param tensor tensor to get layer
88 def _klayer(tensor):
89     return tensor._keras_history.layer
90
91
92 _debug_default_formatter = lambda key, value: "\033[4;32mkey: {}\033[0m\n {}".format(
93     key, value
94 )
95 ##
96 # @brief Print debug information from the record
97 # @param debug list or string that filters debug information from @a data
98 # @param print_option print option for the print function
99 # @param print_format print formatter. a callable that takes key and value should be passed
100 # @param data data to passed to _debug_print
101 def _debug_print(
102     debug=None,
103     print_option={"end": "\n"},
104     print_format=_debug_default_formatter,
105     **data,
106 ):
107     if not debug:
108         return
109     elif isinstance(debug, str):
110         debug = [debug]
111
112     for target in debug:
113         try:
114             print(print_format(target, data[target]), **print_option)
115         except KeyError:
116             pass
117
118
119 ##
120 # @brief generate data using uniform data from a function and save to the file.
121 # @note one-hot label is supported for now, this could be extended if needed.
122 def prepare_data(model, input_shape, label_shape, writer_fn, is_onehot, **kwargs):
123     initial_input = _rand_like(input_shape) / 10
124     if is_onehot:
125         label = tf.one_hot(
126             indices=np.random.randint(0, label_shape[1] - 1, label_shape[0]),
127             depth=label_shape[1],
128         )
129     else:
130         label = _rand_like(label_shape) / 10
131
132     initial_weights = []
133     for layer in iter_model(model):
134         if "file_shape_generation" in kwargs.get("debug", []):
135             get_shape = lambda x: [i.shape for i in x]
136             print(layer.name)
137             print("initial_weights", get_shape(layer.weights))
138         initial_weights += layer.weights.copy()
139
140     writer_fn(initial_input, label, *initial_weights)
141     _debug_print(
142         initial_input=initial_input,
143         label=label,
144         initial_weights=initial_weights,
145         **kwargs,
146     )
147
148     return initial_input, label
149
150
151 ##
152 # @brief iterate model in the order of output rather than layer
153 # @note we might need a bit of reordering if output is more than one, this is assuming 1 to 1 mapping of a model and they are far apart
154 # @param model model to be iterated
155 # @yield layer
156 def iter_model(model):
157     for out in model.outputs:
158         yield _klayer(out)
159
160
161 ##
162 # @brief model iteration wrapper that listen to the gradient and outputs of the model
163 # each results are recorded.
164 def train_step(model, optimizer, loss_fn, initial_input, label, writer_fn, **kwargs):
165     with tf.GradientTape(persistent=True) as tape:
166         tape.watch(initial_input)
167
168         inp = initial_input
169         outp = model.call(inp, training=True)
170         outputs = {}
171         inputs = {}
172         for layer in model.layers:
173             output_indices = model.recorder__output_map[layer.name]
174             outputs[layer.name] = [outp[i] for i in output_indices]
175
176             input_indices = model.recorder__input_map[layer.name]
177             inputs[layer.name] = [outp[i] for i in input_indices]
178
179         # loss = loss_fn(label, outp[-1])
180         loss = []
181         if kwargs.get("multi_out", None) != None:
182             multi_out = kwargs.get("multi_out", [])
183         else:
184             multi_out = [-1]
185         for i in multi_out:
186             loss.append(loss_fn(label, outp[i]))
187
188     for layer in iter_model(model):
189
190         if isinstance(layer, MultiOutLayer):
191             continue
192
193         layer_output = outputs[layer.name]
194         layer_input = inputs[layer.name]
195
196         # when there is a multiple input, this will break.
197         if not layer_input:
198             layer_input = [initial_input]
199
200         gradients = tape.gradient(loss, layer.trainable_weights)
201         optimizer.apply_gradients(zip(gradients, layer.trainable_weights))
202
203         if isinstance(optimizer, tf.keras.optimizers.Adam):
204             wm = [optimizer.get_slot(var, "m") for var in layer.trainable_weights]
205             wv = [optimizer.get_slot(var, "v") for var in layer.trainable_weights]
206             _debug_print(wm=wm, wv=wv, **kwargs)
207
208         _debug_print(lr=optimizer.lr, **kwargs)
209
210         weights = layer.weights.copy()
211         dx = tape.gradient(loss, list(_flatten(layer_input)))
212
213         try:
214             gradients = layer.to_nntr_trainable_weights(gradients)
215         except AttributeError:
216             pass
217
218         writer_fn(
219             *layer_output,  # output of forward
220             *dx,  # output of backward
221             *gradients,  # weight gradient output from backward
222             *weights,  # updated weight after optimization
223         )
224
225         _debug_print(name=layer.name, print_format=value_only_formatter, **kwargs)
226
227         if "file_shape_generation" in kwargs.get("debug", []):
228             get_shape = lambda x: [i.shape for i in x]
229             print(layer.name)
230             print("output", get_shape(layer_output))
231             print("dx", get_shape(dx))
232             print("weights", get_shape(weights))
233             print("gradients", get_shape(gradients))
234
235         _debug_print(
236             output=layer_output,
237             dx=dx,
238             weights=weights,
239             gradients=gradients,
240             dx_shape=[i.shape for i in dx],
241             **kwargs,
242         )
243
244     for l in loss:
245         writer_fn(l)
246
247     _debug_print(loss=loss, **kwargs)
248
249
250 ##
251 # @brief inference_step of the result
252 def inference_step(loss_fn_str, initial_input, label, writer_fn):
253     # Not yet implemented
254     # because loss function with fromLogit is used, last layer fc layer should be added for the inference step
255     if loss_fn_str == "cross_sigmoid" or loss_fn_str == "cross_entropy":
256         # add last activation layer
257         pass
258     raise NotImplementedError("Not Implemented yet")
259
260
261 value_only_formatter = lambda key, value: value
262
263 ##
264 # @brief generate recordable model
265 # @note if model, inputs, outputs is given, trans_layer will NOT be automatically attached
266 # @note in case of using multiout layer, output usage order must match
267 # @param loss_fn_str one of LOSS_FN string otherwise raise KeyError
268 # @param model base model to record, if model is present @a inputs and @a outputs is ignored
269 # @param inputs keras inputs to build a model
270 # @param outputs keras outputs to build a model
271 def generate_recordable_model(
272     loss_fn_str, model=None, inputs=None, outputs=None, is_onehot=False, **kwargs
273 ):
274     if isinstance(model, list):
275         model = [attach_trans_layer(layer) for layer in model]
276
277         inputs = model[0]  # first layer must be input
278         outputs = [inputs]
279         for layer in model[1:]:
280             current_output = layer(outputs[-1])
281             outputs.append(current_output)
282
283     if isinstance(model, K.models.Model) == False:
284         # omit last activation layer if cross softmax or cross_sigmoid
285         if loss_fn_str == "cross_softmax" or loss_fn_str == "cross_sigmoid":
286             if isinstance(_klayer(outputs[-1]), K.layers.Activation):
287                 outputs = outputs[:-1]
288
289         model = K.Model(inputs=inputs, outputs=outputs)
290
291     inputs = model.inputs
292     outputs = model.outputs
293
294     model.summary(
295         print_fn=lambda x: _debug_print(
296             summary=x, print_format=value_only_formatter, **kwargs
297         )
298     )
299
300     output_map = {}
301     for idx, output in enumerate(model.outputs):
302         layer_name = _klayer(output).name
303         try:
304             output_map[layer_name].append(idx)
305         except KeyError:
306             output_map[layer_name] = [idx]
307
308     input_map = defaultdict(list)
309
310     def _insert_input_map(key_layer):
311         if isinstance(key_layer, K.layers.InputLayer):
312             return
313
314         input_node = key_layer.input
315
316         if not isinstance(input_node, list):
317             input_node = [input_node]
318
319         for node in input_node:
320             layer, _, tensor_idx = node._keras_history
321
322             target_idx = output_map[layer.name][tensor_idx]
323             input_list = input_map[key_layer.name]
324             if target_idx not in input_list:
325                 input_list.append(target_idx)
326
327     for idx, output in enumerate(outputs):
328         target_layer = model.get_layer(_klayer(output).name)
329         _insert_input_map(target_layer)
330
331     for _, value in input_map.items():
332         if not value:
333             raise ValueError(f"input_map must contain value. {input_map}")
334
335     _debug_print(input_map=input_map, output_map=output_map, **kwargs)
336
337     # Additional property of output, inputs. This maps index of outputs which
338     # will be used to locate the calculated output
339     # same applies to model input
340     # eg) if in model(inputs=A, outputs=[A, B]),
341     # if B is output of A, output_map[_klayer(B).name] will have 0 (index of A)
342     model.recorder__output_map = output_map
343     model.recorder__input_map = input_map
344
345     return model
346
347
348 ##
349 # @brief record function that records weights, gradients, inputs and outputs for @a iteration
350 # @param loss_fn_str loss function representation
351 # @param optimizer keras optimizer
352 # @param file_name file name to save
353 # @param input_shape input shape to put
354 # @param label_shape label shape to put
355 # @param iteration number of iteration to run
356 # @param model base model to record, if model is present @a inputs and @a outputs is ignored
357 # @param inputs keras inputs to build a model
358 # @param outputs keras outputs to build a model
359 # @param debug a single string key or list of keys to print out particular information,
360 # checkout usage of _debug_print of which is printed. for example `_debug_print(loss=loss, **kwargs)`
361 # catches debug="loss" or debug=["loss"] to print out loss
362 def record(
363     loss_fn_str,
364     optimizer,
365     file_name,
366     input_shape,
367     label_shape,
368     iteration=1,
369     model=None,
370     inputs=None,
371     outputs=None,
372     is_onehot=True,
373     **kwargs,
374 ):
375     if os.path.isfile(file_name):
376         print("Warning: the file %s is being truncated and overwritten" % file_name)
377
378     loss_fn = _get_loss_fn(loss_fn_str)
379     model = generate_recordable_model(
380         loss_fn_str, model, inputs, outputs, is_onehot, **kwargs
381     )
382
383     with open(file_name, "wb") as f:
384         write = _get_writer(f)
385
386         initial_input, label = prepare_data(
387             model, input_shape, label_shape, write, is_onehot, **kwargs
388         )
389         for _ in range(iteration):
390             _debug_print(
391                 iteration="\033[1;33m[%d/%d]\033[0m" % (_ + 1, iteration),
392                 print_format=value_only_formatter,
393                 **kwargs,
394             )
395             train_step(model, optimizer, loss_fn, initial_input, label, write, **kwargs)
396
397         # self.inference_step(initial_input, label, write)
398
399
400 ##
401 # @brief record a single layer
402 def record_single(layer, input_shape, test_name, call_args={}, input_type='int'):
403     layer = attach_trans_layer(layer)
404     layer.build(input_shape)
405     if isinstance(input_shape, list):
406         inputs = [_rand_like(in_shape, 1, input_type) for in_shape in input_shape]
407     else:
408         inputs = _rand_like(input_shape, 1, input_type)
409
410     initial_weights = [tf.Variable(i) for i in layer.weights]
411
412     for _ in range(4):
413         layer.call(inputs, **call_args) # warm layer multiple times
414
415     with tf.GradientTape(persistent=True) as tape:
416         if isinstance(inputs, list):
417             list([tape.watch(inp) for inp in inputs])
418         else:
419             tape.watch(inputs)
420         outputs = layer.call(inputs, **call_args)
421         dy_constant = outputs * 2  # set incoming derivative to 2 instead of 1
422
423     weights = layer.weights.copy()
424     gradients = tape.gradient(dy_constant, layer.trainable_weights)
425     derivatives = tape.gradient(dy_constant, inputs)
426
427     try:
428         gradients = layer.to_nntr_trainable_weights(gradients)
429     except AttributeError:
430         pass
431
432     with open(test_name + ".nnlayergolden", "wb") as f:
433         writer = _get_writer(f)
434
435         def write_tensor(tensors):
436             if not isinstance(tensors, list):
437                 tensors = [tensors]
438             for tensor in tensors:
439                 writer(tf.size(tensor), tensor)
440
441         ## @todo inputs outputs derivatives can be more than one
442         ## @note please update genLayerTests.py comments when updating below
443         write_tensor(initial_weights)
444         write_tensor(inputs)
445         write_tensor(outputs)
446         write_tensor(gradients)
447         write_tensor(weights)
448         write_tensor(derivatives)
449
450
451 def record_single_fp16(layer, input_shape, test_name, call_args={}, input_type='int'):
452     layer = attach_trans_layer(layer)
453     layer.build(input_shape)
454     if isinstance(input_shape, list):
455         inputs = [_rand_like(in_shape, 1, input_type) for in_shape in input_shape]
456     else:
457         inputs = _rand_like(input_shape, 1, input_type)
458
459     initial_weights = [tf.Variable(i) for i in layer.weights]
460
461     for _ in range(4):
462         layer.call(inputs, **call_args) # warm layer multiple times
463
464     with tf.GradientTape(persistent=True) as tape:
465         if isinstance(inputs, list):
466             list([tape.watch(inp) for inp in inputs])
467         else:
468             tape.watch(inputs)
469         outputs = layer.call(inputs, **call_args)
470         dy_constant = outputs * 2  # set incoming derivative to 2 instead of 1
471
472     weights = layer.weights.copy()
473     gradients = tape.gradient(dy_constant, layer.trainable_weights)
474     derivatives = tape.gradient(dy_constant, inputs)
475
476     try:
477         gradients = layer.to_nntr_trainable_weights(gradients)
478     except AttributeError:
479         pass
480
481     with open(test_name + ".nnlayergolden", "wb") as f:
482         writer = _get_writer(f)
483
484         def write_tensor_fp16(tensors):
485             if not isinstance(tensors, list):
486                 tensors = [tensors]
487             for tensor in tensors:
488                 tensor = tf.cast(tensor, tf.float16)
489                 writer(tf.size(tensor,out_type=tf.int16), tensor)
490
491
492         ## @todo inputs outputs derivatives can be more than one
493         ## @note please update genLayerTests.py comments when updating below
494         write_tensor_fp16(initial_weights)
495         write_tensor_fp16(inputs)
496         write_tensor_fp16(outputs)
497         write_tensor_fp16(gradients)
498         write_tensor_fp16(weights)
499         write_tensor_fp16(derivatives)
500