PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
bool apply_gradient = true;
+
/** If gradient optimization mode, then calculate gradient first */
if (dynamic_training_opt.isGradientMode())
node->calcGradient();
/**
- * If optimization off, or gradient must be applied, then this will be true
+ * If optimization off, or gradient must be applied, then this will be
+ * true
* @todo This apply gradient should be passed to the each weight and later
* be queried when updating gradient at once. (after moving apply_gradient
* out of this function)
*/
// auto &layer = node->getObject();
// apply_gradient = dynamic_training_opt.checkIfApply(
- // layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0], opt,
- // iteration);
+ // layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
+ // opt, iteration);
/** If gradient must be applied and its not gradient mode, calculate
* gradient
const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
node.getExecutionOrder();
- std::vector<unsigned int> var_exec_order(
- {forwarding_order, calcGradient_order, calcDerivative_order});
- std::vector<unsigned int> default_grad_exec_order(
- {calcGradient_order, calcDerivative_order});
+ std::vector<unsigned int> default_var_exec_order(
+ {forwarding_order, calcDerivative_order});
+ std::vector<unsigned int> default_grad_exec_order({calcDerivative_order});
TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN;
TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN;
for (unsigned int i = 0; i < weights_spec.size(); ++i) {
auto &[dim, t_initializer, w_reg, w_reg_const, decay, clip_by_global_norm,
need_gradient, name] = weights_spec.at(i);
+ auto var_exec_order = default_var_exec_order;
auto grad_exec_order = default_grad_exec_order;
+
+ if (trainable) {
+ var_exec_order.insert(var_exec_order.begin(), calcGradient_order);
+ grad_exec_order.insert(grad_exec_order.begin(), calcGradient_order);
+ }
+
/**
* If the weight is supposed to be clip by global norm, extend its exec
* order with the max exec order where it will be used for clipping and then
* @brief Create weights with the given spec
*
*/
-std::vector<Var_Grad *>
-Manager::requestTensors(const GraphNode &node,
- const std::vector<Var_Grad::Spec> &tensors_spec,
- const std::vector<std::string> &shared_names) {
+std::vector<Var_Grad *> Manager::requestTensors(
+ const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
+ bool trainable, const std::vector<std::string> &shared_names) {
const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
node.getExecutionOrder();
var_exec_order.push_back(forwarding_order);
/** usage for tensors gradient in backwarding */
- if (enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) {
+ if (trainable &&
+ enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) {
var_exec_order.push_back(calcGradient_order);
grad_exec_order.push_back(calcGradient_order);
}
dim, grad_exec_order, tspan,
Tensor::Initializer::ZEROS);
}
-
} else {
var = tensor_pool.request(name, dim, var_exec_order, tspan, t_init);
*
* @param node Graph node to extract node identifiers/info
* @param tensors_spec Specification for the tensors
+ * @param trainable make the weight trainable if true
* @param shared_names if tensor is shared, name is needed
*
* @return created tensors list
*/
- std::vector<Var_Grad *>
- requestTensors(const GraphNode &node,
- const std::vector<Var_Grad::Spec> &tensors_spec,
- const std::vector<std::string> &shared_names = {});
+ std::vector<Var_Grad *> requestTensors(
+ const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
+ bool trainable, const std::vector<std::string> &shared_names = {});
/**
* @brief Create tensors with the given spec