Layers = inputlayer \
fc1layer \
outputlayer #Layers of Neuralnetwork
-Learning_rate = 0.7 # Learning Rate
-Epoch = 300 # Epoch
+Learning_rate = 0.01 # Learning Rate
+Epoch = 100 # Epoch
Optimizer = sgd # Optimizer : sgd (stochastic gradien decent),
# adam (Adamtive Moment Estimation)
Activation = sigmoid # activation : sigmoid, tanh
-Cost = msr # Cost(loss) function : msr (mean square root error)
+Cost = cross # Cost(loss) function : msr (mean square root error)
# categorical ( for logistic regression )
Model = "model.bin" # model path to save / read
minibatch = 1 # mini batch size
/**
* @brief Enumeration of cost(loss) function type
- * 0. MSR ( Mean Squared Roots )
- * 1. ENTROPY ( Categorical Cross Entropy )
- * 2. Unknown
+ * 0. CATEGORICAL ( Categorical Cross Entropy )
+ * 1. MSR ( Mean Squared Roots )
+ * 2. ENTROPY ( Cross Entropy )
+ * 3. Unknown
*/
-typedef enum { COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_type;
+typedef enum { COST_CATEGORICAL, COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_type;
/**
* @brief Enumeration of activation function type
* @brief derivative sigmoid function
* @param[in] x input
*/
-float sigmoidePrime(float x) { return (float)(1.0 / ((1 + exp(-x)) * (1.0 + 1.0 / (exp(-x) + 0.0000001)))); }
+float sigmoidePrime(float x) {
+ return (float)(1.0 / ((1 + exp(-x)) * (1.0 + 1.0 / (exp(-x) + 0.0000001))));
+}
/**
* @brief tanh function for float type
Tensor OutputLayer::forwarding(Tensor input) {
Input = input;
- if (cost == COST_ENTROPY)
- hidden = input.dot(Weight).applyFunction(activation);
- else
- hidden = input.dot(Weight).add(Bias).applyFunction(activation);
+ hidden = input.dot(Weight).add(Bias).applyFunction(activation);
return hidden;
}
Tensor OutputLayer::backwarding(Tensor label, int iteration) {
float lossSum = 0.0;
Tensor Y2 = label;
- Tensor Y = hidden;
+ Tensor Y = hidden.softmax();
Tensor ret;
Tensor dJdB;
ll = opt.learning_rate * pow(opt.decay_rate, (iteration / opt.decay_steps));
}
- if (cost == COST_ENTROPY) {
- dJdB = Y.subtract(Y2);
- Tensor temp = ((Y2.multiply(-1.0).transpose().dot(Y.add(opt.epsilon).applyFunction(log_float)))
- .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
- Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
- loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
- } else {
- Tensor sub = Y2.subtract(Y);
- Tensor l = (sub.multiply(sub)).sum().multiply(0.5);
- std::vector<float> t = l.Mat2Vec();
- for (int i = 0; i < l.getBatch(); i++) {
- lossSum += t[i];
- }
-
- loss = lossSum / (float)l.getBatch();
-
- dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
+ switch (cost) {
+ case COST_CATEGORICAL: {
+ dJdB = Y.subtract(Y2);
+ Tensor temp = ((Y2.multiply(-1.0).transpose().dot(Y.add(opt.epsilon).applyFunction(log_float)))
+ .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
+ Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
+ loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
+ } break;
+ case COST_MSR: {
+ Tensor sub = Y2.subtract(Y);
+ Tensor l = (sub.multiply(sub)).sum().multiply(0.5);
+ std::vector<float> t = l.Mat2Vec();
+ for (int i = 0; i < l.getBatch(); i++) {
+ lossSum += t[i];
+ }
+
+ loss = lossSum / (float)l.getBatch();
+
+ dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
+ } break;
+ case COST_ENTROPY: {
+ if (activation == sigmoid)
+ dJdB = Y.subtract(Y2).multiply(1.0 / Y.getWidth());
+ else
+ dJdB = (Y.subtract(Y2))
+ .multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime))
+ .divide(Y.multiply(Y.multiply(-1.0).add(1.0)))
+ .multiply(1.0 / Y.getWidth());
+
+ Tensor l = (Y2.multiply(Y.applyFunction(log_float))
+ .add((Y2.multiply(-1.0).add(1.0)).multiply((Y.multiply(-1.0).add(1.0)).applyFunction(log_float))))
+ .multiply(-1.0 / (Y2.getWidth()))
+ .sum();
+
+ std::vector<float> t = l.Mat2Vec();
+
+ for (int i = 0; i < l.getBatch(); i++) {
+ lossSum += t[i];
+ }
+ loss = lossSum / (float)l.getBatch();
+ } break;
+ case COST_UNKNOWN:
+ default:
+ break;
}
Tensor dJdW = Input.transpose().dot(dJdB);
tmp.data[i] = 1.0;
cblas_saxpy(this->len, value, tmp.data.data(), 1, result.data.data(), 1);
#else
- for (int k = 0; k < batch; ++k) {
+ for (int k = 0; k < len; ++k) {
result.data[k] = data[k] + value;
}
#endif
result.data[b + i + 2] = this->data[b + i + 2] / m.data[i + 2];
result.data[b + i + 3] = this->data[b + i + 3] / m.data[i + 3];
}
- for (int j = i - 1; j < width * height; ++j)
+ for (int j = i; j < width * height; ++j)
result.data[b + j] = this->data[b + j] / m.data[j];
}
} else {
result.data[i + 2] = this->data[i + 2] / m.data[i + 2];
result.data[i + 3] = this->data[i + 3] / m.data[i + 3];
}
- for (int j = i - 1; j < len; ++j)
+ for (int j = i; j < len; ++j)
result.data[j] = this->data[j] / m.data[j];
}