2 # SPDX-License-Identifier: Apache-2.0
4 # Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
6 # @file genModelTests_v2.py
7 # @date 25 November 2021
8 # @brief Generate model tcs
9 # @author Parichay Kapoor <pk.kapoor@samsung.com>
12 from recorder_v2 import record_v2, inspect_file, _rand_like
15 class ReduceMeanLast(torch.nn.Module):
18 self.fc = torch.nn.Linear(2, 7)
19 self.loss = torch.nn.Identity()
21 def forward(self, inputs, labels):
22 out = self.fc(inputs[0])
23 out = torch.mean(out, dim=-1)
24 loss = self.loss(torch.sum(out))
27 class MolAttention(torch.nn.Module):
28 def __init__(self, query_size):
29 super(MolAttention, self).__init__()
30 self.query_size = query_size
32 self.K = 5 # number of mixtures
33 self.dense1 = torch.nn.Linear(self.query_size, self.units)
34 self.dense2 = torch.nn.Linear(self.units, 3 * self.K, bias=False)
35 self.loss = torch.nn.Identity()
37 def forward(self, inputs, labels):
39 query, values, attention_state, mask_len = inputs
41 query, values, attention_state = inputs
43 batch_size, timesteps, _ = values.size()
45 dense1_out = torch.tanh(self.dense1(query.unsqueeze(1)))
46 mlp_proj_out = self.dense2(dense1_out)
47 kappa, beta, alpha = mlp_proj_out.chunk(chunks=3, dim=2)
49 kappa = torch.exp(kappa)
50 beta = torch.exp(beta)
51 alpha = torch.softmax(alpha, dim=2)
52 kappa = kappa + attention_state
54 # Timesteps const array
55 j = torch.arange(start=1, end=timesteps + 1).view(1, -1, 1).expand(batch_size, -1, self.K)
57 integrals_left = torch.sigmoid(torch.div(j + 0.5 - kappa, beta + 1e-8))
58 integrals_right = torch.sigmoid(torch.div(j - 0.5 - kappa, beta + 1e-8))
59 integrals = alpha * (integrals_left - integrals_right)
60 scores = torch.sum(integrals, dim=2)
62 if mask_len is not None:
63 max_len = max(int(mask_len.max()), scores.shape[1])
64 mask = torch.arange(0, max_len)\
66 .unsqueeze(0).expand(mask_len.numel(), max_len)\
67 .lt(mask_len.unsqueeze(1))
68 scores.masked_fill_(torch.logical_not(mask), 0.)
70 output = torch.matmul(scores.unsqueeze(1), values).squeeze(dim=1)
72 loss = self.loss(torch.sum(output)) + self.loss(torch.sum(kappa))
74 return (output, kappa), loss
76 class MultiHeadAttention(torch.nn.Module):
77 def __init__(self, embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, need_weights=True, provide_attention_mask=False):
78 super(MultiHeadAttention, self).__init__()
79 self.multi_head_attention = torch.nn.MultiheadAttention(embed_dim, num_heads, dropout, bias, add_bias_kv, add_zero_attn, kdim, vdim, batch_first=True)
80 self.loss = torch.nn.MSELoss()
81 self.need_weights = need_weights
82 self.provide_attention_mask = provide_attention_mask
84 def forward(self, inputs, labels):
85 inputs, attn_mask = (inputs[:-1], inputs[-1]) if self.provide_attention_mask else (inputs, None)
92 output, attention_weight = self.multi_head_attention(query, key, value, need_weights=self.need_weights, attn_mask=attn_mask)
93 loss = self.loss(output, labels[0])
94 if attention_weight is not None:
95 output = [output, attention_weight]
99 def input_label_reader(input_dims, label_dims, input_dtype):
100 query_dim, key_dim, value_dim, *left_dim = input_dims
101 query_dtype, key_dtype, value_dtype, *left_dtype = input_dtype
102 assert(query_dtype == key_dtype == value_dtype)
104 mask_dim = left_dim[0]
105 mask_dtype = left_dtype[0]
106 if mask_dtype == bool:
107 # Since nntrainer does not support bool type tensor yet, convert mask to float type
108 # todo: return bool type mask tensor
109 mask = torch.randn(mask_dim) > 0.5
110 new_attn_mask = torch.zeros_like(mask, dtype=torch.float32)
111 new_attn_mask.masked_fill_(mask, float("-inf"))
112 mask = [new_attn_mask]
113 elif mask_dtype == int:
114 mask = [torch.randint(0, 1, mask_dim, torch.int32)]
116 mask = _rand_like([mask_dim], -1e9, mask_dtype)
119 inputs = _rand_like([query_dim, key_dim, value_dim], dtype=input_dtype if input_dtype is not None else float) + mask
120 labels = _rand_like(label_dims, dtype=float)
121 return inputs, labels
123 class PositionalEncoding(torch.nn.Module):
124 def __init__(self, d_model: int, max_len):
126 position = torch.arange(max_len).unsqueeze(1)
127 div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
128 pe = torch.zeros(1, max_len, d_model)
129 pe[0, :, 0::2] = torch.sin(position * div_term)
130 pe[0, :, 1::2] = torch.cos(position * div_term)
131 self.register_buffer('pe', pe)
132 self.multi_head_attention = torch.nn.MultiheadAttention(d_model, 2, batch_first=True)
133 self.loss = torch.nn.MSELoss()
135 def forward(self, inputs, labels):
137 output += self.pe[:,:output.size(1),:]
138 output = self.multi_head_attention(output, output, output)
139 loss = self.loss(output[0], labels[0])
142 # class for test transformer encoder layer
143 class TransformerEncoderLayer(torch.nn.Module):
144 def __init__(self, d_model, nhead, dim_feedforward, provide_attention_mask=False):
145 super(TransformerEncoderLayer, self).__init__()
146 self.encoder_layer = torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout=0.0, batch_first=True)
147 self.loss = torch.nn.MSELoss()
148 # indicate attention mask will be given or not
149 self.provide_attention_mask = provide_attention_mask
151 def forward(self, inputs, labels):
152 inputs, attn_mask = (inputs[0], inputs[-1]) if self.provide_attention_mask else (inputs[0], None)
153 output = self.encoder_layer(inputs, attn_mask)
155 loss = self.loss(output, labels[0])
159 def input_label_reader(input_dims, label_dims, input_dtypes):
160 input_dim, *left_dim = input_dims
161 input_dtype, *left_dtype = input_dtypes
163 mask_dim = left_dim[0]
164 mask_dtype = left_dtype[0]
165 if mask_dtype == bool:
166 # Since nntrainer does not support bool type tensor yet, convert mask to float type
167 # todo: return bool type mask tensor
168 mask = torch.randn(mask_dim) > 0.5
169 new_attn_mask = torch.zeros_like(mask, dtype=torch.float32)
170 new_attn_mask.masked_fill_(mask, float("-inf"))
171 mask = [new_attn_mask]
172 elif mask_dtype == int:
173 mask = [torch.randint(0, 1, mask_dim, torch.int32)]
175 mask = _rand_like([mask_dim], -1e9, mask_dtype)
178 inputs = _rand_like([input_dim], dtype=input_dtype if input_dtype is not None else float) + mask
179 labels = _rand_like(label_dims, dtype=float)
180 return inputs, labels
182 # class for test transformer decoder layer
183 class TransformerDecoderLayer(torch.nn.Module):
184 def __init__(self, d_model, nhead, dim_feedforward, provide_attention_mask=False):
185 super(TransformerDecoderLayer, self).__init__()
186 self.decoder_layer = torch.nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout=0.0, batch_first=True)
187 self.loss = torch.nn.MSELoss()
188 # indicate attention mask will be given or not
189 self.provide_attention_mask = provide_attention_mask
191 def forward(self, inputs, labels):
192 tgt, memory, tgt_mask, memory_mask = (inputs[0], inputs[1], inputs[-2], inputs[-1]) if self.provide_attention_mask else (inputs[0], inputs[1], None, None)
193 output = self.decoder_layer(tgt, memory, tgt_mask, memory_mask)
195 loss = self.loss(output, labels[0])
199 def input_label_reader(input_dims, label_dims, input_dtypes):
200 tgt_dim, memory_dim, *mask_dims = input_dims
201 tgt_dtype, memory_dtype, *mask_dtypes = input_dtypes
203 if mask_dtypes[0] == bool:
204 # Since nntrainer does not support bool type tensor yet, convert mask to float type
205 # todo: return bool type mask tensor
206 masks = [torch.randn(dim) > 0.5 for dim in mask_dims]
207 new_attn_masks = [torch.zeros_like(mask, dtype=torch.float32) for mask in masks]
208 for mask, new_attn_mask in zip(masks, new_attn_masks):
209 new_attn_mask.masked_fill_(mask, float("-inf"))
210 masks = new_attn_masks
211 elif mask_dtypes[0] == int:
212 masks = [torch.randint(0, 1, mask_dim, torch.int32) for mask_dim in mask_dims]
214 masks = _rand_like(mask_dims, -1e9, mask_dtypes)
217 inputs = _rand_like([tgt_dim, memory_dim], dtype=[tgt_dtype, memory_dtype] if tgt_dtype is not None and memory_dtype is not None else float) + masks
218 labels = _rand_like(label_dims, dtype=float)
219 return inputs, labels
221 # class for test transformer.
222 # Transformer in this class consist of transformer encoder and transformer decoder
223 class Transformer(torch.nn.Module):
224 def __init__(self, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, provide_attention_mask=False):
225 super(Transformer, self).__init__()
226 self.transformer = torch.nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout=0.0, batch_first=True)
227 self.loss = torch.nn.MSELoss()
228 # indicate attention mask will be given or not
229 self.provide_attention_mask = provide_attention_mask
231 def forward(self, inputs, labels):
232 src, tgt, src_mask, tgt_mask, memory_mask = (inputs[0], inputs[1], inputs[-3], inputs[-2], inputs[-1]) if self.provide_attention_mask else (inputs[0], inputs[1], None, None, None)
233 output = self.transformer(src, tgt, src_mask, tgt_mask, memory_mask)
235 loss = self.loss(output, labels[0])
239 def input_label_reader(input_dims, label_dims, input_dtypes):
240 src_dim, tgt_dim, *mask_dims = input_dims
241 src_dtype, tgt_dtype, *mask_dtypes = input_dtypes
243 if mask_dtypes[0] == bool:
244 # Since nntrainer does not support bool type tensor yet, convert mask to float type
245 # todo: return bool type mask tensor
246 masks = [torch.randn(dim) > 0.5 for dim in mask_dims]
247 new_attn_masks = [torch.zeros_like(mask, dtype=torch.float32) for mask in masks]
248 for mask, new_attn_mask in zip(masks, new_attn_masks):
249 new_attn_mask.masked_fill_(mask, float("-inf"))
250 masks = new_attn_masks
251 elif mask_dtypes[0] == int:
252 masks = [torch.randint(0, 1, mask_dim, torch.int32) for mask_dim in mask_dims]
254 masks = _rand_like(mask_dims, -1e9, mask_dtypes)
257 inputs = _rand_like([src_dim, tgt_dim], dtype=[src_dtype, tgt_dtype] if src_dtype is not None and tgt_dtype is not None else float) + masks
258 labels = _rand_like(label_dims, dtype=float)
259 return inputs, labels
261 class FCRelu(torch.nn.Module):
262 def __init__(self, decay=False):
264 self.fc = torch.nn.Linear(3, 10)
265 self.fc1 = torch.nn.Linear(10, 2)
266 self.loss = torch.nn.MSELoss()
269 def forward(self, inputs, labels):
270 out = torch.relu(self.fc(inputs[0]))
271 out = torch.sigmoid(self.fc1(out))
272 loss = self.loss(out, labels[0])
275 def getOptimizer(self):
277 return torch.optim.SGD(self.parameters(), lr=0.1)
280 non_decay_params = []
281 for name, params in self.named_parameters():
282 if name == 'fc.weight' or name == 'fc1.bias':
283 decay_params.append(params)
285 non_decay_params.append(params)
286 return torch.optim.SGD([
287 {'params': non_decay_params},
288 {'params': decay_params, 'weight_decay': 0.9}], lr=0.1)
290 # class for test non-trainable fc layer
291 class NonTrainableFC(torch.nn.Module):
292 def __init__(self, idx):
294 self.fc1 = torch.nn.Linear(3, 10)
295 self.fc2 = torch.nn.Linear(10, 10)
296 self.fc3 = torch.nn.Linear(10, 2)
297 self.loss = torch.nn.MSELoss()
298 # determine which layer to set to non-trainable
300 for param in self.fc1.parameters():
301 param.requires_grad = False
303 for param in self.fc2.parameters():
304 param.requires_grad = False
306 def forward(self, inputs, labels):
307 out = torch.relu(self.fc1(inputs[0]))
308 out = torch.relu(self.fc2(out))
309 out = torch.sigmoid(self.fc3(out))
310 loss = self.loss(out, labels[0])
313 if __name__ == "__main__":
317 input_dims=[(3, 2,)],
318 label_dims=[(3, 1,)],
319 name="reduce_mean_last",
323 MolAttention(query_size=6),
325 input_dims=[(3,6), (3,4,6), (3,1,5), (3)],
326 input_dtype=[float, float, float, int],
327 label_dims=[(3,1,6), (3,1,5)],
328 name="mol_attention_masked",
332 MolAttention(query_size=6),
334 input_dims=[(3,6), (3,4,6), (3,1,5)],
335 input_dtype=[float, float, float],
336 label_dims=[(3,1,6), (3,1,5)],
337 name="mol_attention",
341 MultiHeadAttention(embed_dim=6, num_heads=2, bias=False, need_weights=False),
343 input_dims=[(3,3,6), (3,2,6), (3,2,6)],
344 label_dims=[(3,3,6)],
345 input_dtype=[float, float, float],
346 name="multi_head_attention_disable_need_weights",
350 MultiHeadAttention(embed_dim=6, num_heads=2),
352 input_dims=[(3,3,6), (3,2,6), (3,2,6)],
353 label_dims=[(3,3,6), (3,3,2)],
354 input_dtype=[float, float, float],
355 name="multi_head_attention",
359 MultiHeadAttention(embed_dim=6, num_heads=2, kdim=4, vdim=5),
361 input_dims=[(3,3,6), (3,2,4), (3,2,5)],
362 label_dims=[(3,3,6), (3,3,2)],
363 input_dtype=[float, float, float],
364 name="multi_head_attention_kdim_vdim",
368 MultiHeadAttention(embed_dim=6, num_heads=2, provide_attention_mask=True),
370 input_dims=[(3,3,6), (3,2,6), (3,2,6), (6,3,2)],
371 label_dims=[(3,3,6), (3,3,2)],
372 input_dtype=[float, float, float, float],
373 input_label_reader=MultiHeadAttention.input_label_reader,
374 name="multi_head_attention_float_attn_mask",
377 # @todo: change this pseudo bool type tensor to actual bool tensor
379 MultiHeadAttention(embed_dim=6, num_heads=2, provide_attention_mask=True),
381 input_dims=[(3,3,6), (3,2,6), (3,2,6), (6,3,2)],
382 label_dims=[(3,3,6), (3,3,2)],
383 input_dtype=[float, float, float, bool],
384 input_label_reader=MultiHeadAttention.input_label_reader,
385 name="multi_head_attention_pseudo_bool_attn_mask",
389 MultiHeadAttention(embed_dim=6, num_heads=2),
391 input_dims=[(3,3,6)],
392 label_dims=[(3,3,6), (3,3,3)],
394 name="multi_head_attention_self_attention",
398 PositionalEncoding(d_model=6, max_len=7),
400 input_dims=[(3,5,6)],
402 label_dims=[(3,5,6)],
403 name="positional_encoding",
407 TransformerEncoderLayer(d_model=6, nhead=2, dim_feedforward=7),
409 input_dims=[(3,5,6)],
410 label_dims=[(3,5,6)],
412 name="transformer_encoder_layer",
416 TransformerEncoderLayer(d_model=6, nhead=2, dim_feedforward=7, provide_attention_mask=True),
418 input_dims=[(3,5,6), (6,5,5)],
419 label_dims=[(3,5,6)],
420 input_dtype=[float, float],
421 input_label_reader=TransformerEncoderLayer.input_label_reader,
422 name="transformer_encoder_layer_float_attn_mask",
426 TransformerEncoderLayer(d_model=6, nhead=2, dim_feedforward=7, provide_attention_mask=True),
428 input_dims=[(3,5,6), (6,5,5)],
429 label_dims=[(3,5,6)],
430 input_dtype=[float, bool],
431 input_label_reader=TransformerEncoderLayer.input_label_reader,
432 name="transformer_encoder_layer_pseudo_bool_attn_mask",
436 TransformerDecoderLayer(d_model=6, nhead=2, dim_feedforward=7),
438 input_dims=[(3,5,6), (3,4,6)],
439 label_dims=[(3,5,6)],
440 input_dtype=[float, float],
441 name="transformer_decoder_layer",
445 TransformerDecoderLayer(d_model=6, nhead=2, dim_feedforward=7, provide_attention_mask=True),
447 input_dims=[(3,5,6), (3,4,6), (6,5,5), (6,5,4)],
448 label_dims=[(3,5,6)],
449 input_dtype=[float, float, float, float],
450 input_label_reader=TransformerDecoderLayer.input_label_reader,
451 name="transformer_decoder_layer_float_attn_mask",
455 TransformerDecoderLayer(d_model=6, nhead=2, dim_feedforward=7, provide_attention_mask=True),
457 input_dims=[(3,5,6), (3,4,6), (6,5,5), (6,5,4)],
458 label_dims=[(3,5,6)],
459 input_dtype=[float, float, bool, bool],
460 input_label_reader=TransformerDecoderLayer.input_label_reader,
461 name="transformer_decoder_layer_pseudo_bool_attn_mask",
465 Transformer(d_model=6, nhead=2, num_encoder_layers=1, num_decoder_layers=1, dim_feedforward=7),
467 input_dims=[(3,5,6), (3,4,6)],
468 label_dims=[(3,4,6)],
469 input_dtype=[float, float],
470 name="transformer_single",
474 Transformer(d_model=6, nhead=2, num_encoder_layers=2, num_decoder_layers=2, dim_feedforward=7),
476 input_dims=[(3,5,6), (3,4,6)],
477 label_dims=[(3,4,6)],
478 input_dtype=[float, float],
479 name="transformer_stack",
483 Transformer(d_model=6, nhead=2, num_encoder_layers=2, num_decoder_layers=2, dim_feedforward=7, provide_attention_mask=True),
485 input_dims=[(3,5,6), (3,4,6), (6,5,5), (6,4,4), (6,4,5)],
486 label_dims=[(3,4,6)],
487 input_dtype=[float, float, float, float, float],
488 input_label_reader=Transformer.input_label_reader,
489 name="transformer_float_attn_mask",
493 Transformer(d_model=6, nhead=2, num_encoder_layers=2, num_decoder_layers=2, dim_feedforward=7, provide_attention_mask=True),
495 input_dims=[(3,5,6), (3,4,6), (6,5,5), (6,4,4), (6,4,5)],
496 label_dims=[(3,4,6)],
497 input_dtype=[float, float, bool, bool, bool],
498 input_label_reader=Transformer.input_label_reader,
499 name="transformer_pseudo_bool_attn_mask",
502 fc_relu_decay = FCRelu(decay=True)
509 name="fc_relu_decay",
510 optimizer=fc_relu_decay.getOptimizer()
513 non_trainable_fc_idx1 = NonTrainableFC(idx=1)
515 non_trainable_fc_idx1,
520 name="non_trainable_fc_idx1"
523 non_trainable_fc_idx2 = NonTrainableFC(idx=2)
525 non_trainable_fc_idx2,
530 name="non_trainable_fc_idx2"
533 # Function to check the created golden test file
534 inspect_file("fc_relu_decay.nnmodelgolden")