}
}
+ for (unsigned int idx = 0; idx < graph.size(); ++idx) {
+ auto const &lnode = getSortedLayerNode(idx);
+ auto &rc = lnode->getRunContext();
+ auto first_grad_access = std::get<1>(lnode->getExecutionOrder());
+ for (unsigned i = 0; i < rc.getNumWeights(); ++i) {
+ if (!rc.weightHasGradient(i)) {
+ continue;
+ }
+ if (tensor_manager->isFirstAccess(rc.getWeightGrad(i).getName(),
+ first_grad_access)) {
+ rc.getWeightObject(i).setAsGradientFirstAccess();
+ }
+ }
+ }
/**** identify model input / output to be set externally later ****/
auto identify_as_model_input = [this](LayerNode *node) {
auto num_input = node->getNumInputs();
Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
- if (context.isWeightDependent(weight_idx[FCParams::bias])) {
+ if (context.isGradientFirstAccess(weight_idx[FCParams::bias])) {
+ derivative_.sum({0, 1, 2}, djdb);
+ } else {
/// @todo optimize below by adding beta to Tensor::sum
Tensor t = derivative_.sum({0, 1, 2});
djdb.add_i(t);
- } else {
- derivative_.sum({0, 1, 2}, djdb);
}
- if (context.isWeightDependent(weight_idx[FCParams::weight])) {
- input_.dot(derivative_, djdw, true, false, 1.0f);
- } else {
+ if (context.isGradientFirstAccess(weight_idx[FCParams::weight])) {
input_.dot(derivative_, djdw, true, false, 0.0f);
+ } else {
+ input_.dot(derivative_, djdw, true, false, 1.0f);
}
}
return weights[idx]->isDependent();
}
+bool RunLayerContext::isGradientFirstAccess(unsigned int idx) const {
+ return weights[idx]->isGradientFirstAccess();
+}
+
/**
* @brief Get the tensor name
*
*/
bool isWeightDependent(unsigned int idx) const;
+ /**
+ * @brief check current graident is first access
+ *
+ * @param idx index
+ * @return bool true if last access
+ */
+ bool isGradientFirstAccess(unsigned int idx) const;
+
/**
* @brief Get the tensor name
*
explicit Var_Grad(const Tensor &v, const Tensor &g, const std::string &n = "",
bool is_dependent = false) :
is_dependent(is_dependent),
+ is_first_access_gradient(false),
var(
std::make_shared<Tensor>(v.getSharedDataTensor(v.getDim(), 0, false, n))),
grad(std::make_shared<Tensor>(n + grad_suffix)) {
*/
explicit Var_Grad(Tensor *v, Tensor *g, bool is_dependent = false) :
is_dependent(is_dependent),
+ is_first_access_gradient(false),
var(std::shared_ptr<Tensor>(v, [](void *) {})),
grad(std::shared_ptr<Tensor>(g, [](void *) {})) {
if (!v)
*/
bool isDependent() const { return is_dependent; }
+ /**
+ * @brief Set the As First Gradient Access
+ *
+ */
+ void setAsGradientFirstAccess() { is_first_access_gradient = true; }
+
+ /**
+ * @brief check if given weight at the last execution order
+ * (first access of gradient)
+ *
+ * @return bool true if last access
+ */
+ bool isGradientFirstAccess() const { return is_first_access_gradient; }
+
inline static const std::string grad_suffix = ":grad";
protected:
bool is_dependent; /**< check if the weight tensor is burrowed from somewhere
thus it is dependent */
+ bool is_first_access_gradient; /**< check if current weight tensor is last
+ access */
std::shared_ptr<Tensor> var; /**< variable to be updated and used */
std::shared_ptr<Tensor> grad; /**< gradient for the variable */