From e0090257a0c88c1a1e16f92a88423de01ef231ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 14:33:59 -0700 Subject: [PATCH] Add NNAPI delegation for EMBEDING_LOOKUP, RNN, SVDF PiperOrigin-RevId: 197790679 --- tensorflow/contrib/lite/nnapi_delegate.cc | 158 +++++++++++++++++++++++------- tensorflow/contrib/lite/nnapi_delegate.h | 8 ++ 2 files changed, 132 insertions(+), 34 deletions(-) diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 107c84e6..eed57d4 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -155,7 +155,6 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, nn_type, static_cast(tensor->dims->size), reinterpret_cast(tensor->dims->data), scale, zeroPoint}; CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); - // TODO(aselle): Based on Michael's suggestion, limiting this to read // only memory if (tensor->allocation_type == kTfLiteMmapRo) { @@ -168,7 +167,12 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, CHECK_NN(ANeuralNetworksModel_setOperandValue( nn_model, next_id, tensor->data.raw, tensor->bytes)); } + } else if (tensor->bytes == 0) { + // These size 0 tensors are optional tensors reserved. + CHECK_NN( + ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0)); } + ++next_id; } return next_id; @@ -177,7 +181,9 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, // Adds the operations and their parameters to the NN API model. // 'next-id' is the operand ID of the next operand of the model. void AddOpsAndParams(tflite::Interpreter* interpreter, - ANeuralNetworksModel* nn_model, uint32_t next_id) { + ANeuralNetworksModel* nn_model, uint32_t next_id, + std::vector* model_state_inputs, + std::vector* model_state_outputs) { for (size_t i = 0; i < interpreter->nodes_size(); i++) { const auto* node_and_registration = interpreter->node_and_registration(i); const TfLiteNode& node = node_and_registration->first; @@ -188,6 +194,8 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, // Add the parameters. std::vector augmented_inputs( node.inputs->data, node.inputs->data + node.inputs->size); + std::vector augmented_outputs( + node.outputs->data, node.outputs->data + node.outputs->size); auto add_scalar_int32 = [&nn_model, &augmented_inputs, &next_id](int value) { @@ -207,12 +215,23 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, augmented_inputs.push_back(next_id++); }; + // Handle state tensors of RNN, LSTM, SVDF. + // For each state_out tensor, a corresponding state_in operand needs to be + // created for NNAPI. auto duplicate_state_tensor_float32 = - [interpreter, &nn_model, &augmented_inputs](int tensor_id) { + [interpreter, &nn_model, &next_id, &augmented_inputs, + &model_state_inputs, &model_state_outputs](int tensor_id) { const TfLiteTensor* tensor = interpreter->tensor(tensor_id); - CHECK_NN(ANeuralNetworksModel_setOperandValue( - nn_model, tensor_id, tensor->data.raw, tensor->bytes)); - augmented_inputs.push_back(tensor_id); + ANeuralNetworksOperandType operand_type{ + ANEURALNETWORKS_TENSOR_FLOAT32, + static_cast(tensor->dims->size), + reinterpret_cast(tensor->dims->data), + tensor->params.scale, tensor->params.zero_point}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); + augmented_inputs.push_back(next_id); + model_state_inputs->push_back(next_id); + model_state_outputs->push_back(tensor_id); + next_id++; }; auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); }; @@ -275,28 +294,51 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, add_scalar_float32(builtin->proj_clip); }; + // LSTM in NNAPI requires scratch tensor as an output operand. + auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model, + &next_id, &augmented_outputs]() { + int scratch_buffer_index = node.temporaries->data[0]; + const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index); + ANeuralNetworksOperandType operand_type{ + ANEURALNETWORKS_TENSOR_FLOAT32, + static_cast(tensor->dims->size), + reinterpret_cast(tensor->dims->data), tensor->params.scale, + tensor->params.zero_point}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); + augmented_outputs.insert(augmented_outputs.begin(), next_id++); + }; + auto add_mean_params = [&add_scalar_int32](void* data) { auto builtin = reinterpret_cast(data); add_scalar_int32(builtin->keep_dims); }; -#if 0 - auto add_reshape_params = [&](void* data) { - auto builtin = reinterpret_cast(data); - uint32_t tensor_size_shape = builtin->num_dimensions; - ANeuralNetworksOperandType operand_type{ - ANEURALNETWORKS_TENSOR_INT32, - {static_cast(1), - reinterpret_cast(&tensor_size_shape)}, - 0, - 0}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) - CHECK_NN(ANeuralNetworksModel_setOperandValue( - nn_model, next_id, builtin->shape, - sizeof(int) * builtin->num_dimensions)); - augmented_inputs.push_back(next_id++); + auto add_svdf_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->rank); + add_scalar_int32(builtin->activation); }; -#endif + + auto add_rnn_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->activation); + }; + + // Handle optional input tensors. + auto add_optional_tensors = [&nn_model, &augmented_inputs, + &next_id](int nn_type) { + for (size_t idx = 0; idx < augmented_inputs.size(); idx++) { + if (augmented_inputs[idx] == kOptionalTensor) { + const std::vector dim = {0, 0}; + ANeuralNetworksOperandType operand_type{nn_type, 2, dim.data(), 0, 0}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) + CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, + nullptr, 0)) + augmented_inputs[idx] = next_id++; + } + } + }; + int nnapi_version = 10; ANeuralNetworksOperationType nn_op_type; @@ -366,13 +408,31 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, break; case tflite::BuiltinOperator_LSTM: { duplicate_state_tensor_float32( - node.outputs->data[/*kOutputStateTensor*/ 1]); + node.outputs->data[/*kOutputStateTensor*/ 0]); duplicate_state_tensor_float32( - node.outputs->data[/*kCellStateTensor*/ 2]); + node.outputs->data[/*kCellStateTensor*/ 1]); add_lstm_params(node.builtin_data); + add_lstm_scratch_tensor_float32(); + add_optional_tensors(ANEURALNETWORKS_TENSOR_FLOAT32); nn_op_type = ANEURALNETWORKS_LSTM; break; } + case tflite::BuiltinOperator_SVDF: { + duplicate_state_tensor_float32(node.outputs->data[/*kStateTensor*/ 0]); + add_svdf_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_SVDF; + break; + } + case tflite::BuiltinOperator_RNN: { + duplicate_state_tensor_float32( + node.outputs->data[/*kHiddenStateTensor*/ 0]); + add_rnn_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_RNN; + break; + } + case tflite::BuiltinOperator_EMBEDDING_LOOKUP: + nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP; + break; case tflite::BuiltinOperator_PAD: nnapi_version = 11; // require NNAPI 1.1 nn_op_type = ANEURALNETWORKS_PAD; @@ -392,12 +452,9 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, break; case tflite::BuiltinOperator_CONCAT_EMBEDDINGS: case tflite::BuiltinOperator_LSH_PROJECTION: - case tflite::BuiltinOperator_SVDF: case tflite::BuiltinOperator_HASHTABLE_LOOKUP: - case tflite::BuiltinOperator_RNN: case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: - case tflite::BuiltinOperator_EMBEDDING_LOOKUP: case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: @@ -450,8 +507,9 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, // Add the operation. CHECK_NN(ANeuralNetworksModel_addOperation( nn_model, nn_op_type, static_cast(augmented_inputs.size()), - augmented_inputs.data(), static_cast(node.outputs->size), - reinterpret_cast(node.outputs->data))); + augmented_inputs.data(), + static_cast(augmented_outputs.size()), + reinterpret_cast(augmented_outputs.data()))); } } @@ -475,12 +533,25 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { } uint32_t next_id = addTensorOperands(interpreter, nn_model_, skip_list); - AddOpsAndParams(interpreter, nn_model_, next_id); + AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_, + &model_states_outputs_); + + std::vector augmented_inputs = interpreter->inputs(); + std::vector augmented_outputs = interpreter->outputs(); + + // All state tensors input/output need to be treated as model input/output. + augmented_inputs.insert(augmented_inputs.end(), + model_states_inputs_.begin(), + model_states_inputs_.end()); + augmented_outputs.insert(augmented_outputs.end(), + model_states_outputs_.begin(), + model_states_outputs_.end()); + CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs( - nn_model_, static_cast(interpreter->inputs().size()), - reinterpret_cast(interpreter->inputs().data()), - static_cast(interpreter->outputs().size()), - reinterpret_cast(interpreter->outputs().data()))); + nn_model_, static_cast(augmented_inputs.size()), + reinterpret_cast(augmented_inputs.data()), + static_cast(augmented_outputs.size()), + reinterpret_cast(augmented_outputs.data()))); CHECK_NN(ANeuralNetworksModel_finish(nn_model_)); } if (!nn_compiled_model_) { @@ -507,6 +578,7 @@ TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { CHECK_NN(ANeuralNetworksExecution_setInput( execution, i, nullptr, tensor->data.raw, tensor->bytes)); } + // Tell nn api where to place final data. for (size_t i = 0; i < interpreter->outputs().size(); i++) { int output = interpreter->outputs()[i]; @@ -514,6 +586,24 @@ TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { CHECK_NN(ANeuralNetworksExecution_setOutput( execution, i, nullptr, tensor->data.raw, tensor->bytes)); } + + // The state_out of previous invocation need to be mapped to state_in of + // current invocation. + for (size_t i = 0; i < model_states_outputs_.size(); i++) { + int state_tensor_idx = model_states_outputs_[i]; + TfLiteTensor* tensor = interpreter->tensor(state_tensor_idx); + // Here we are using a deep copy for state_in tensors so that we are not + // reading and writing into the same buffer during a invocation. + // TODO(miaowang): using double shared buffer to minimize the copies. + CHECK_NN(ANeuralNetworksExecution_setInput( + execution, i + interpreter->inputs().size(), nullptr, tensor->data.raw, + tensor->bytes)); + // Tell NNAPI where to output the state_out. + CHECK_NN(ANeuralNetworksExecution_setOutput( + execution, i + interpreter->outputs().size(), nullptr, tensor->data.raw, + tensor->bytes)); + } + // Currently use blocking compute. ANeuralNetworksEvent* event = nullptr; CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event)); diff --git a/tensorflow/contrib/lite/nnapi_delegate.h b/tensorflow/contrib/lite/nnapi_delegate.h index e980009..94dea4f 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.h +++ b/tensorflow/contrib/lite/nnapi_delegate.h @@ -59,6 +59,14 @@ class NNAPIDelegate { ANeuralNetworksModel* nn_model_ = nullptr; // The NN API compilation handle ANeuralNetworksCompilation* nn_compiled_model_ = nullptr; + + // List of state tensors for LSTM, RNN, SVDF. + // NN API does not allow ops to maintain states across multiple + // invocations. We need to manually create state input tensors from + // corresponding state output tensors of TFLite operations, and map them + // correctly. + std::vector model_states_inputs_; + std::vector model_states_outputs_; }; } // namespace tflite -- 2.7.4