// weight_ih ( input to hidden ) : [ 1, 1, feature_size, NUM_GATE * unit ]
// -> i, f, g, o
const TensorDim weight_ih_dim({feature_size, NUM_GATE * unit});
- wt_idx[LSTMParams::weight_ih] =
- context.requestWeight(weight_ih_dim, weight_initializer, weight_regularizer,
- weight_regularizer_constant, weight_decay, "weight_ih", true);
+ wt_idx[LSTMParams::weight_ih] = context.requestWeight(
+ weight_ih_dim, weight_initializer, weight_regularizer,
+ weight_regularizer_constant, weight_decay, "weight_ih", true);
// weight_hh ( hidden to hidden ) : [ 1, 1, unit, NUM_GATE * unit ] -> i,
// f, g, o
const TensorDim weight_hh_dim({unit, NUM_GATE * unit});
} else {
// bias_ih ( input bias ) : [ 1, 1, 1, NUM_GATE * unit ] -> i, f, g, o
const TensorDim bias_ih_dim({NUM_GATE * unit});
- wt_idx[LSTMParams::bias_ih] =
- context.requestWeight(bias_ih_dim, bias_initializer,
- WeightRegularizer::NONE, 1.0f, bias_decay, "bias_ih", true);
+ wt_idx[LSTMParams::bias_ih] = context.requestWeight(
+ bias_ih_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
+ bias_decay, "bias_ih", true);
// bias_hh ( hidden bias ) : [ 1, 1, 1, NUM_GATE * unit ] -> i, f, g, o
wt_idx[LSTMParams::bias_hh] = context.requestWeight(
- bias_hh_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
+ bias_ih_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
bias_decay, "bias_hh", true);
}
}
const TensorDim reverse_weight_ih_dim({feature_size, NUM_GATE * unit});
wt_idx[LSTMParams::reverse_weight_ih] = context.requestWeight(
reverse_weight_ih_dim, weight_initializer, weight_regularizer,
- weight_regularizer_constant, "reverse_weight_ih", true);
+ weight_regularizer_constant, weight_decay, "reverse_weight_ih", true);
// reverse_weight_hh ( hidden to hidden ) : [ 1, 1, unit, NUM_GATE *
// unit ]
// -> i, f, g, o
const TensorDim reverse_weight_hh_dim({unit, NUM_GATE * unit});
wt_idx[LSTMParams::reverse_weight_hh] = context.requestWeight(
reverse_weight_hh_dim, weight_initializer, weight_regularizer,
- weight_regularizer_constant, "reverse_weight_hh", true);
+ weight_regularizer_constant, weight_decay, "reverse_weight_hh", true);
if (!disable_bias) {
if (integrate_bias) {
// reverse_bias_h ( input bias, hidden bias are integrate to 1 bias
const TensorDim reverse_bias_h_dim({NUM_GATE * unit});
wt_idx[LSTMParams::reverse_bias_h] = context.requestWeight(
reverse_bias_h_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
- "reverse_bias_h", true);
+ bias_decay, "reverse_bias_h", true);
} else {
// reverse_bias_ih ( input bias ) : [ 1, 1, 1, NUM_GATE * unit ] ->
// i, f, g, o
const TensorDim reverse_bias_ih_dim({NUM_GATE * unit});
wt_idx[LSTMParams::reverse_bias_ih] = context.requestWeight(
reverse_bias_ih_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
- "reverse_bias_ih", true);
+ bias_decay, "reverse_bias_ih", true);
// reverse_bias_hh ( hidden bias ) : [ 1, 1, 1, NUM_GATE * unit ] ->
// i, f, g, o
const TensorDim reverse_bias_hh_dim({NUM_GATE * unit});
wt_idx[LSTMParams::reverse_bias_hh] = context.requestWeight(
reverse_bias_hh_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
- "reverse_bias_hh", true);
+ bias_decay, "reverse_bias_hh", true);
}
}
return (output, kappa), loss
+class FCRelu(torch.nn.Module):
+ def __init__(self, decay=False):
+ super().__init__()
+ self.fc = torch.nn.Linear(3, 10)
+ self.fc1 = torch.nn.Linear(10, 2)
+ self.loss = torch.nn.MSELoss()
+ self.decay = decay
+
+ def forward(self, inputs, labels):
+ out = torch.relu(self.fc(inputs[0]))
+ out = torch.sigmoid(self.fc1(out))
+ loss = self.loss(out, labels[0])
+ return out, loss
+
+ def getOptimizer(self):
+ if not self.decay:
+ return torch.optim.SGD(self.parameters(), lr=0.1)
+ else:
+ decay_params = []
+ non_decay_params = []
+ for name, params in self.named_parameters():
+ if name == 'fc.weight' or name == 'fc1.bias':
+ decay_params.append(params)
+ else:
+ non_decay_params.append(params)
+ return torch.optim.SGD([
+ {'params': non_decay_params},
+ {'params': decay_params, 'weight_decay': 0.9}], lr=0.1)
+
+
if __name__ == "__main__":
record_v2(
ReduceMeanLast(),
name="mol_attention",
)
- # inspect_file("mol_attention_masked.nnmodelgolden")
+ fc_relu_decay = FCRelu(decay=True)
+ record_v2(
+ fc_relu_decay,
+ iteration=2,
+ input_dims=[(3,3)],
+ input_dtype=[float],
+ label_dims=[(3,2)],
+ name="fc_relu_decay",
+ optimizer=fc_relu_decay.getOptimizer()
+ )
+
+ inspect_file("fc_relu_decay.nnmodelgolden")
static std::string red_mean_base = "type = reduce_mean";
static IniSection sgd_base("optimizer", "Type = sgd");
static IniSection constant_loss("loss", "type = constant_derivative");
+static IniSection act_base("activation", "Type = Activation");
IniWrapper reduce_mean_last("reduce_mean_last",
{
constant_loss,
});
+IniWrapper fc_relu_decay(
+ "fc_relu_decay",
+ {nn_base + "Loss=mse | batch_size = 3", sgd_base + "learning_rate = 0.1",
+ IniSection("input") + "type=input" + "input_shape = 1:1:3",
+ IniSection("dense") + fc_base + "unit = 10" + "weight_decay=0.9",
+ IniSection("act") + act_base + "Activation = relu",
+ IniSection("dense_1") + fc_base + "unit = 2" + "bias_decay=0.9",
+ IniSection("act_1") + act_base + "Activation = sigmoid"});
+
static std::unique_ptr<NeuralNetwork> makeMolAttention() {
std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
nn->setProperty({"batch_size=3"});
ModelTestOption::COMPARE_V2),
mkModelTc_V2(makeMolAttentionMasked, "mol_attention_masked",
ModelTestOption::COMPARE_RUN_V2),
+ mkModelIniTc(fc_relu_decay, DIM_UNUSED, NOT_USED_,
+ ModelTestOption::COMPARE_V2),
}),
[](const testing::TestParamInfo<nntrainerModelTest::ParamType> &info) {
return std::get<1>(info.param);