namespace nntrainer {
-AttentionLayer::AttentionLayer() : sm(ActivationType::ACT_SOFTMAX) {
+AttentionLayer::AttentionLayer() {
+
wt_idx.fill(std::numeric_limits<unsigned>::max());
}
false, TensorLifespan::ITERATION_LIFESPAN);
context.setOutputDimensions({query_dim});
+
+ auto data_type = context.getActivationDataType();
+ if (data_type == ml::train::TensorDim::DataType::FP32) {
+ sm.setActiFunc<float>(ActivationType::ACT_SOFTMAX);
+ } else if (data_type == ml::train::TensorDim::DataType::FP16) {
+ sm.setActiFunc<_FP16>(ActivationType::ACT_SOFTMAX);
+ }
}
void AttentionLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &weights = context.getTensor(wt_idx[AttentionParams::weights]);
Tensor dweight = Tensor(
- TensorDim({derivative.batch(), 1, derivative.height(), value.height()}));
+ TensorDim({derivative.batch(), 1, derivative.height(), value.height()},
+ weights.getTensorType()));
/** derivative for dot 2 */
dweight.dot_batched_deriv_wrt_1(value, derivative);
(d1 != 0 && float_eq(d2, 0)),
1);
}
- const float epsilon = 1e-4;
+ const float epsilon = 1e-3;
auto tensor = t1.clone();
auto answer = t2.clone();
auto mean_squared_error = mse<_FP16>(
answer.getData<_FP16>(), answer.getData<_FP16>(), tensor.size());
- EXPECT_IN_RANGE(cos_sim, 0.99, 1);
+ EXPECT_IN_RANGE(cos_sim, 1 - epsilon, 1 + epsilon);
EXPECT_IN_RANGE(mean_squared_error, 0, epsilon);
return (weak_match == total);
::testing::Values(attention_shared_kv,
attention_shared_kv_batched,
attention_batched));
+
+auto attention_shared_kv_fp16fp16 = LayerGoldenTestParamType(
+ nntrainer::createLayer<nntrainer::AttentionLayer>, {}, "1:1:5:7,1:1:3:7",
+ "attention_shared_kv_fp16fp16.nnlayergolden",
+ LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
+auto attention_shared_kv_batched_fp16fp16 = LayerGoldenTestParamType(
+ nntrainer::createLayer<nntrainer::AttentionLayer>, {}, "2:1:5:7,2:1:3:7",
+ "attention_shared_kv_batched_fp16fp16.nnlayergolden",
+ LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
+auto attention_batched_fp16fp16 = LayerGoldenTestParamType(
+ nntrainer::createLayer<nntrainer::AttentionLayer>, {},
+ "2:1:5:7,2:1:3:7,2:1:3:7", "attention_batched_fp16fp16.nnlayergolden",
+ LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
+GTEST_PARAMETER_TEST(Attention16, LayerGoldenTest,
+ ::testing::Values(attention_shared_kv_fp16fp16));