/**
* @brief Gradient for the weights in this layer
+ * @note The order of gradients should match the order in weights
*/
std::vector<std::reference_wrapper<Tensor>> gradients;
/**
* @brief weights in this layer
+ * @note The weights are combined with their corresponding bias
+ * For example- with W0, W1, B0 and B1, weights would be of format
+ * {W0, B0, W1, B1}.
*/
std::vector<std::reference_wrapper<Tensor>> weights;
Kdim.height(kernel_size[0]);
Kdim.width(kernel_size[1]);
+ weights.clear();
for (unsigned int i = 0; i < filter_size; ++i) {
Tensor Knl = initializeWeight(Kdim, weight_ini_type, status);
NN_RETURN_STATUS();
Tensor(input_dim.batch(), Kdim.channel(), Kdim.height(), Kdim.width()));
delBias.push_back(Tensor(input_dim.batch(), 1, 1, 1));
filters.push_back(Knl);
+ weights.push_back(Knl);
Tensor B(input_dim.batch(), 1, 1, 1);
if (!bias_init_zero) {
B.apply([&](float x) { return random(); });
}
bias.push_back(B);
+ weights.push_back(B);
}
// this output_dim should be the same with dimension of hidden
output_dim.batch(input_dim.batch());
}
gradients.clear();
- weights.clear();
// Update K / bias
for (unsigned int i = 0; i < filter_size; ++i) {
filters[i], weight_decay.lambda)
.run();
- weights.push_back(filters[i]);
- weights.push_back(bias[i]);
-
gradients.push_back(djdw);
gradients.push_back(delBias[i]);
}
} else {
bias.setRandUniform(-0.5, 0.5);
}
+
+ weights.clear();
+ weights.push_back(weight);
+ weights.push_back(bias);
+
return status;
}
gradients.push_back(djdw);
gradients.push_back(djdb);
- weights.clear();
- weights.push_back(weight);
- weights.push_back(bias);
-
opt.apply_gradients(weights, gradients, iteration);
return ret;