From: 이상규/동작제어Lab(SR)/Principal Engineer/삼성전자 Date: Mon, 26 Nov 2018 05:56:12 +0000 (+0900) Subject: tflite_benchmark_model is updated to v1.12.0. (#3660) X-Git-Tag: 0.3~331 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9fe5ce4d9d386e50e529e52b09bb4951d887a7b1;p=platform%2Fcore%2Fml%2Fnnfw.git tflite_benchmark_model is updated to v1.12.0. (#3660) Most files are not changed from v1.12.0. My modification is to support multiple kernel expansion operators. You can find the changes from stats_calculator.cc and profile_summarizer.cc. Signed-off-by: Sanggyu Lee --- diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index c6f5b5b..65c9cd9 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -37,15 +37,6 @@ list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS}) list(APPEND TFLITE_SRCS "${TFLITE_DEPEND_DIR}/farmhash/src/farmhash.cc") -# Profiling -if(BUILD_TFLITE_BENCHMARK_MODEL) - file(GLOB TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/*.cc") - file(GLOB TFLITE_PROFILING_TESTS "${TENSORFLOW_LITE_BASE}/profiling/*test*.cc") - list(REMOVE_ITEM TFLITE_PROFILING_SRCS ${TFLITE_PROFILING_TESTS}) - list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_BASE}/tensorflow/core/util/stats_calculator.cc") - list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS}) -endif() - list(APPEND TFLITE_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow") list(APPEND TFLITE_INCLUDES "${TFLITE_DEPEND_DIR}/absl") list(APPEND TFLITE_INCLUDES "${TFLITE_DEPEND_DIR}/gemmlowp") diff --git a/include/util/profiling/time.h b/include/util/profiling/time.h index da32893..cc2ec31 100644 --- a/include/util/profiling/time.h +++ b/include/util/profiling/time.h @@ -1,19 +1,3 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/libs/util/src/profiling/.FORMATDENY b/libs/util/src/profiling/.FORMATDENY new file mode 100644 index 0000000..e69de29 diff --git a/libs/util/src/profiling/time.cc b/libs/util/src/profiling/time.cc index 6fe1b54..6841c71 100644 --- a/libs/util/src/profiling/time.cc +++ b/libs/util/src/profiling/time.cc @@ -1,19 +1,3 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,20 +14,34 @@ limitations under the License. ==============================================================================*/ #include "util/profiling/time.h" +#if defined(_MSC_VER) +#include // NOLINT(build/c++11) +#else #include +#endif + +namespace tflite { +namespace profiling { +namespace time { + +#if defined(_MSC_VER) + +uint64_t NowMicros() { + return std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); +} + +#else -namespace tflite -{ -namespace profiling -{ -namespace time -{ -uint64_t NowMicros() -{ +uint64_t NowMicros() { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; } -} // namespace time -} // namespace profiling -} // namespace tflite + +#endif // defined(_MSC_VER) + +} // namespace time +} // namespace profiling +} // namespace tflite diff --git a/tools/tflite_benchmark_model/CMakeLists.txt b/tools/tflite_benchmark_model/CMakeLists.txt index d526904..dd54dc5 100644 --- a/tools/tflite_benchmark_model/CMakeLists.txt +++ b/tools/tflite_benchmark_model/CMakeLists.txt @@ -1,5 +1,12 @@ file(GLOB_RECURSE SOURCES "*.cc") +nnfw_find_package(TensorFlowSource REQUIRED) +set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/contrib/lite") +list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc" + "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc" + "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc" + "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc") + add_executable(tflite_benchmark_model ${SOURCES}) target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED") target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl nnfw_util nnfw_support_tflite) diff --git a/tools/tflite_benchmark_model/README.md b/tools/tflite_benchmark_model/README.md index 9376930..8d99763 100644 --- a/tools/tflite_benchmark_model/README.md +++ b/tools/tflite_benchmark_model/README.md @@ -9,7 +9,7 @@ of runs. Aggregrate latency statistics are reported after running the benchmark. The instructions below are for running the binary on Desktop and Android, for iOS please use the -[iOS benchmark app] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios). +[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios). ## Parameters @@ -17,11 +17,6 @@ The binary takes the following required parameters: * `graph`: `string` \ The path to the TFLite model file. -* `input_layer`: `string` \ - The name of the input layer, this is typically the first layer of the model. -* `input_layer_shape`: `string` \ - The shape of the input layer. This is a comma separated string of the shape - of tensor of input layer. and the following optional parameters: @@ -29,11 +24,13 @@ and the following optional parameters: The number of threads to use for running TFLite interpreter. * `warmup_runs`: `int` (default=1) \ The number of warmup runs to do before starting the benchmark. +* `num_runs`: `int` (default=50) \ + The number of runs. Increase this to reduce variance. * `run_delay`: `float` (default=-1.0) \ The delay in seconds between subsequent benchmark runs. Non-positive values mean use no delay. * `use_nnapi`: `bool` (default=false) \ - Whether to use [Android NNAPI] (https://developer.android.com/ndk/guides/neuralnetworks/). + Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/). This API is available on recent Android devices. ## To build/install/run @@ -75,8 +72,6 @@ adb push mobilenet_quant_v1_224.tflite /data/local/tmp ``` adb shell /data/local/tmp/benchmark_model \ --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \ - --input_layer="input" \ - --input_layer_shape="1,224,224,3" \ --num_threads=4 ``` @@ -93,13 +88,10 @@ For example: ``` bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model \ --graph=mobilenet_quant_v1_224.tflite \ - --input_layer="Placeholder" \ - --input_layer_shape="1,224,224,3" \ --num_threads=4 ``` -The MobileNet graph used as an example here may be downloaded from -https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip +The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip). ## Reducing variance between runs on Android. @@ -115,10 +107,8 @@ E.g. for running the benchmark on big cores on Pixel 2 with a single thread one can use the following command: ``` -adb shell tasket f0 /data/local/tmp/benchmark_model \ +adb shell taskset f0 /data/local/tmp/benchmark_model \ --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \ - --input_layer="input" \ - --input_layer_shape="1,224,224,3" \ --num_threads=1 ``` @@ -205,5 +195,3 @@ Memory (bytes): count=0 Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9 ``` - - diff --git a/tools/tflite_benchmark_model/benchmark_main.cc b/tools/tflite_benchmark_model/benchmark_main.cc deleted file mode 100644 index 7e4231c..0000000 --- a/tools/tflite_benchmark_model/benchmark_main.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "benchmark_tflite_model.h" -#include "logging.h" - -namespace nnfw { -namespace benchmark { - -int Main(int argc, char** argv) { -#ifdef TFLITE_CUSTOM_OPS_HEADER - TFLITE_LOG(INFO) << "STARTING with custom ops!"; -#else - TFLITE_LOG(INFO) << "STARTING!"; -#endif - BenchmarkTfLiteModel benchmark; - BenchmarkLoggingListener listener; - benchmark.AddListener(&listener); - benchmark.Run(argc, argv); - return 0; -} -} // namespace benchmark -} // namespace nnfw - -int main(int argc, char** argv) { return nnfw::benchmark::Main(argc, argv); } diff --git a/tools/tflite_benchmark_model/benchmark_model.cc b/tools/tflite_benchmark_model/benchmark_model.cc deleted file mode 100644 index 7869180..0000000 --- a/tools/tflite_benchmark_model/benchmark_model.cc +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "benchmark_model.h" - -#include - -#include -#include - -#include "tensorflow/contrib/lite/profiling/time.h" -#include "logging.h" - -namespace { -void SleepForSeconds(double sleep_seconds) { - if (sleep_seconds <= 0.0) { - return; - } - // Convert the run_delay string into a timespec. - timespec req; - req.tv_sec = static_cast(sleep_seconds); - req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000; - // If requested, sleep between runs for an arbitrary amount of time. - // This can be helpful to determine the effect of mobile processor - // scaling and thermal throttling. -#ifdef PLATFORM_WINDOWS - Sleep(sleep_seconds * 1000); -#else - nanosleep(&req, nullptr); -#endif -} - -} // namespace - -namespace nnfw { -namespace benchmark { -using tensorflow::Stat; - -BenchmarkParams BenchmarkModel::DefaultParams() { - BenchmarkParams params; - params.AddParam("num_runs", BenchmarkParam::Create(50)); - params.AddParam("run_delay", BenchmarkParam::Create(-1.0f)); - params.AddParam("num_threads", BenchmarkParam::Create(1)); - params.AddParam("benchmark_name", BenchmarkParam::Create("")); - params.AddParam("output_prefix", BenchmarkParam::Create("")); - params.AddParam("warmup_runs", BenchmarkParam::Create(1)); - return params; -} - -BenchmarkModel::BenchmarkModel() : params_(DefaultParams()) {} - -void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults &results) { - auto inference_us = results.inference_time_us(); - auto init_us = results.startup_latency_us(); - auto warmup_us = results.warmup_time_us(); - TFLITE_LOG(INFO) << "Average inference timings in us: " - << "Warmup: " << warmup_us.avg() << ", " - << "Init: " << init_us << ", " - << "no stats: " << inference_us.avg(); -} - -std::vector BenchmarkModel::GetFlags() { - return { - CreateFlag("num_runs", ¶ms_, "number of runs"), - CreateFlag("run_delay", ¶ms_, "delay between runs in seconds"), - CreateFlag("num_threads", ¶ms_, "number of threads"), - CreateFlag("benchmark_name", ¶ms_, "benchmark name"), - CreateFlag("output_prefix", ¶ms_, - "benchmark output prefix"), - CreateFlag("warmup_runs", ¶ms_, - "how many runs to initialize model"), - }; -} - -void BenchmarkModel::LogFlags() { - TFLITE_LOG(INFO) << "Num runs: [" << params_.Get("num_runs") << "]"; - TFLITE_LOG(INFO) << "Inter-run delay (seconds): [" - << params_.Get("run_delay") << "]"; - TFLITE_LOG(INFO) << "Num threads: [" << params_.Get("num_threads") - << "]"; - TFLITE_LOG(INFO) << "Benchmark name: [" - << params_.Get("benchmark_name") << "]"; - TFLITE_LOG(INFO) << "Output prefix: [" - << params_.Get("output_prefix") << "]"; - TFLITE_LOG(INFO) << "Warmup runs: [" << params_.Get("warmup_runs") - << "]"; -} - -Stat BenchmarkModel::Run(int num_times, RunType run_type) { - Stat run_stats; - TFLITE_LOG(INFO) << "Running benchmark for " << num_times << " iterations "; - for (int run = 0; run < num_times; run++) { - listeners_.OnSingleRunStart(run_type); - int64_t start_us = tflite::profiling::time::NowMicros(); - RunImpl(); - int64_t end_us = tflite::profiling::time::NowMicros(); - listeners_.OnSingleRunEnd(); - - run_stats.UpdateStat(end_us - start_us); - SleepForSeconds(params_.Get("run_delay")); - } - - std::stringstream stream; - run_stats.OutputToStream(&stream); - TFLITE_LOG(INFO) << stream.str() << std::endl; - - return run_stats; -} - -void BenchmarkModel::Run(int argc, char **argv) { - if (!ParseFlags(argc, argv)) { - return; - } - - LogFlags(); - - listeners_.OnBenchmarkStart(params_); - int64_t initialization_start_us = tflite::profiling::time::NowMicros(); - Init(); - int64_t initialization_end_us = tflite::profiling::time::NowMicros(); - int64_t startup_latency_us = initialization_end_us - initialization_start_us; - TFLITE_LOG(INFO) << "Initialized session in " << startup_latency_us / 1e3 - << "ms"; - - uint64_t input_bytes = ComputeInputBytes(); - Stat warmup_time_us = - Run(params_.Get("warmup_runs"), WARMUP); - Stat inference_time_us = - Run(params_.Get("num_runs"), REGULAR); - listeners_.OnBenchmarkEnd( - {startup_latency_us, input_bytes, warmup_time_us, inference_time_us}); -} - -bool BenchmarkModel::ParseFlags(int argc, char **argv) { - auto flag_list = GetFlags(); - const bool parse_result = - Flags::Parse(&argc, const_cast(argv), flag_list); - if (!parse_result) { - std::string usage = Flags::Usage(argv[0], flag_list); - TFLITE_LOG(ERROR) << usage; - return false; - } - return ValidateFlags(); -} - -} // namespace benchmark -} // namespace nnfw diff --git a/tools/tflite_benchmark_model/benchmark_model.h b/tools/tflite_benchmark_model/benchmark_model.h deleted file mode 100644 index 5645e29..0000000 --- a/tools/tflite_benchmark_model/benchmark_model.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef __TFLITE_BENCHMARK_MODEL_BENCHMARK_MODEL_H__ -#define __TFLITE_BENCHMARK_MODEL_BENCHMARK_MODEL_H__ - -#include -#include -#include -#include -#include -#include - -#include "benchmark_params.h" -#include "command_line_flags.h" -#include "tensorflow/core/util/stats_calculator.h" - -namespace nnfw { -namespace benchmark { - -enum RunType { - WARMUP, - REGULAR, -}; - -class BenchmarkResults { - public: - BenchmarkResults(int64_t startup_latency_us, uint64_t input_bytes, - tensorflow::Stat warmup_time_us, - tensorflow::Stat inference_time_us) - : startup_latency_us_(startup_latency_us), - input_bytes_(input_bytes), - warmup_time_us_(warmup_time_us), - inference_time_us_(inference_time_us) {} - - tensorflow::Stat inference_time_us() const { - return inference_time_us_; - } - tensorflow::Stat warmup_time_us() const { return warmup_time_us_; } - int64_t startup_latency_us() const { return startup_latency_us_; } - uint64_t input_bytes() const { return input_bytes_; } - double throughput_MB_per_second() const { - double bytes_per_sec = (input_bytes_ * inference_time_us_.count() * 1e6) / - inference_time_us_.sum(); - return bytes_per_sec / (1024.0 * 1024.0); - } - - private: - int64_t startup_latency_us_; - uint64_t input_bytes_; - tensorflow::Stat warmup_time_us_; - tensorflow::Stat inference_time_us_; -}; - -class BenchmarkListener { - public: - virtual void OnBenchmarkStart(const BenchmarkParams& params) {} - virtual void OnSingleRunStart(RunType runType) {} - virtual void OnSingleRunEnd() {} - virtual void OnBenchmarkEnd(const BenchmarkResults& results) {} - virtual ~BenchmarkListener() {} -}; - -// A listener that forwards its method calls to a collection of listeners. -class BenchmarkListeners : public BenchmarkListener { - public: - // Added a listener to the listener collection. - // |listener| is not owned by the instance of |BenchmarkListeners|. - // |listener| should not be null and should outlast the instance of - // |BenchmarkListeners|. - void AddListener(BenchmarkListener* listener) { - listeners_.push_back(listener); - } - - void OnBenchmarkStart(const BenchmarkParams& params) override { - for (auto listener : listeners_) { - listener->OnBenchmarkStart(params); - } - } - - void OnSingleRunStart(RunType runType) override { - for (auto listener : listeners_) { - listener->OnSingleRunStart(runType); - } - } - - void OnSingleRunEnd() override { - for (auto listener : listeners_) { - listener->OnSingleRunEnd(); - } - } - - void OnBenchmarkEnd(const BenchmarkResults& results) override { - for (auto listener : listeners_) { - listener->OnBenchmarkEnd(results); - } - } - - ~BenchmarkListeners() {} - - private: - // Use vector so listeners are invoked in the order they are added. - std::vector listeners_; -}; - -// Benchmark listener that just logs the results of benchmark run. -class BenchmarkLoggingListener : public BenchmarkListener { - void OnBenchmarkEnd(const BenchmarkResults& results) override; -}; - -template -Flag CreateFlag(const char* name, BenchmarkParams* params, - const std::string& usage) { - return Flag(name, [params, name](const T& val) { params->Set(name, val); }, - params->Get(name), usage); -} - -// Benchmarks a model. -// -// Subclasses need to implement initialization and running of the model. -// The results can be collected by adding BenchmarkListener(s). -class BenchmarkModel { - public: - static BenchmarkParams DefaultParams(); - BenchmarkModel(); - BenchmarkModel(BenchmarkParams params) : params_(std::move(params)) {} - virtual ~BenchmarkModel() {} - bool ParseFlags(int argc, char** argv); - virtual void Init() = 0; - void Run(int argc, char** argv); - void AddListener(BenchmarkListener* listener) { - listeners_.AddListener(listener); - } - - protected: - virtual void LogFlags(); - virtual bool ValidateFlags() { return true; } - virtual std::vector GetFlags(); - virtual uint64_t ComputeInputBytes() = 0; - virtual tensorflow::Stat Run(int num_times, RunType run_type); - virtual void RunImpl() = 0; - BenchmarkParams params_; - BenchmarkListeners listeners_; -}; - -} // namespace benchmark -} // namespace nnfw - -#endif //__TFLITE_BENCHMARK_MODEL_BENCHMARK_MODEL_H__ diff --git a/tools/tflite_benchmark_model/benchmark_params.cc b/tools/tflite_benchmark_model/benchmark_params.cc deleted file mode 100644 index 7b667a4..0000000 --- a/tools/tflite_benchmark_model/benchmark_params.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "benchmark_params.h" - -#include -#include -#include - -#include "logging.h" - -namespace nnfw { -namespace benchmark { - -void BenchmarkParam::AssertHasSameType(BenchmarkParam::ParamType a, - BenchmarkParam::ParamType b) { - TFLITE_BENCHMARK_CHECK(a == b) << "Type mismatch while accessing parameter."; -} - -template <> -BenchmarkParam::ParamType BenchmarkParam::GetValueType() { - return BenchmarkParam::ParamType::TYPE_INT32; -} - -template <> -BenchmarkParam::ParamType BenchmarkParam::GetValueType() { - return BenchmarkParam::ParamType::TYPE_BOOL; -} - -template <> -BenchmarkParam::ParamType BenchmarkParam::GetValueType() { - return BenchmarkParam::ParamType::TYPE_FLOAT; -} - -template <> -BenchmarkParam::ParamType BenchmarkParam::GetValueType() { - return BenchmarkParam::ParamType::TYPE_STRING; -} - -void BenchmarkParams::AssertParamExists(const std::string& name) const { - TFLITE_BENCHMARK_CHECK(HasParam(name)) << name << " was not found."; -} - -} // namespace benchmark -} // namespace nnfw diff --git a/tools/tflite_benchmark_model/benchmark_params.h b/tools/tflite_benchmark_model/benchmark_params.h deleted file mode 100644 index 1ac3f4a..0000000 --- a/tools/tflite_benchmark_model/benchmark_params.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef __TFLITE_BENCHMARK_MODEL_BENCHMARK_PARAMS_H__ -#define __TFLITE_BENCHMARK_MODEL_BENCHMARK_PARAMS_H__ -#include -#include -#include -#include - -#include "logging.h" - -namespace nnfw { -namespace benchmark { - -template -class TypedBenchmarkParam; - -class BenchmarkParam { - protected: - enum class ParamType { TYPE_INT32, TYPE_FLOAT, TYPE_BOOL, TYPE_STRING }; - - public: - template - static std::unique_ptr Create(const T& default_value) { - return std::unique_ptr( - new TypedBenchmarkParam(default_value)); - } - - template - TypedBenchmarkParam* AsTyped() { - AssertHasSameType(GetValueType(), type_); - return static_cast*>(this); - } - virtual ~BenchmarkParam() {} - BenchmarkParam(ParamType type) : type_(type) {} - - private: - static void AssertHasSameType(ParamType a, ParamType b); - protected: - template - static ParamType GetValueType(); - - const ParamType type_; -}; - -template -class TypedBenchmarkParam : public BenchmarkParam { - public: - TypedBenchmarkParam(const T& value) - : BenchmarkParam(GetValueType()), value_(value) {} - void Set(const T& value) { value_ = value; } - - T Get() { return value_; } - - private: - T value_; -}; - -class BenchmarkParams { - public: - void AddParam(const std::string& name, - std::unique_ptr value) { - params_[name] = std::move(value); - } - - bool HasParam(const std::string& name) const { - return params_.find(name) != params_.end(); - } - - template - void Set(const std::string& name, const T& value) { - AssertParamExists(name); - params_.at(name)->AsTyped()->Set(value); - } - - template - T Get(const std::string& name) const { - AssertParamExists(name); - return params_.at(name)->AsTyped()->Get(); - } - - private: - void AssertParamExists(const std::string& name) const; - std::unordered_map> params_; -}; - -} // namespace benchmark -} // namespace nnfw -#endif // __TFLITE_BENCHMARK_MODEL_BENCHMARK_PARAMS_H__ diff --git a/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tools/tflite_benchmark_model/benchmark_tflite_model.cc index d277795..611bd6a 100644 --- a/tools/tflite_benchmark_model/benchmark_tflite_model.cc +++ b/tools/tflite_benchmark_model/benchmark_tflite_model.cc @@ -29,7 +29,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "benchmark_tflite_model.h" +#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h" #include #include @@ -39,11 +39,16 @@ limitations under the License. #include #include +#ifdef TFLITE_FLEX +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" +#endif // TFLITE_FLEX #include "support/tflite/kernels/register.h" #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/op_resolver.h" #include "tensorflow/contrib/lite/string_util.h" -#include "logging.h" +#include "tensorflow/contrib/lite/tools/benchmark/logging.h" + +// For profiling nnapi_delegate #include "util/profiling/profiling.h" #include "support/tflite/nnapi_delegate.h" @@ -51,7 +56,7 @@ limitations under the License. void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); #endif -namespace nnfw { +namespace tflite { namespace benchmark { void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) { @@ -130,7 +135,7 @@ void FillRandomValue(T* ptr, const std::vector& sizes, void FillRandomString(tflite::DynamicBuffer* buffer, const std::vector& sizes, - const std::function& random_func) { + const std::function& random_func) { int num_elements = 1; for (int dim : sizes) { num_elements *= dim; @@ -142,7 +147,7 @@ void FillRandomString(tflite::DynamicBuffer* buffer, } bool PopulateInputLayerInfo( - const std::string& names_string, const std::string& shapes_string, + const string& names_string, const string& shapes_string, std::vector* info) { std::vector names = Split(names_string, ','); std::vector shapes = Split(shapes_string, ':'); @@ -216,8 +221,8 @@ std::vector BenchmarkTfLiteModel::GetFlags() { return flags; } -void BenchmarkTfLiteModel::LogFlags() { - BenchmarkModel::LogFlags(); +void BenchmarkTfLiteModel::LogParams() { + BenchmarkModel::LogParams(); TFLITE_LOG(INFO) << "Graph: [" << params_.Get("graph") << "]"; TFLITE_LOG(INFO) << "Input layers: [" << params_.Get("input_layer") << "]"; @@ -226,7 +231,7 @@ void BenchmarkTfLiteModel::LogFlags() { TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get("use_nnapi") << "]"; } -bool BenchmarkTfLiteModel::ValidateFlags() { +bool BenchmarkTfLiteModel::ValidateParams() { if (params_.Get("graph").empty()) { TFLITE_LOG(ERROR) << "Please specify the name of your TF Lite input file with --graph"; @@ -247,6 +252,46 @@ uint64_t BenchmarkTfLiteModel::ComputeInputBytes() { return total_input_bytes; } +void BenchmarkTfLiteModel::PrepareInputsAndOutputs() { + auto interpreter_inputs = interpreter->inputs(); + // Set the values of the input tensors. + for (int j = 0; j < inputs.size(); ++j) { + const InputLayerInfo& input = inputs[j]; + int i = interpreter_inputs[j]; + TfLiteTensor* t = interpreter->tensor(i); + std::vector sizes = input.shape; + + // TODO(ahentz): below we ignore the O-th dimension (number of batches). + if (t->type == kTfLiteFloat32) { + FillRandomValue( + interpreter->typed_tensor(i), + std::vector(sizes.begin() + 1, sizes.end()), + []() { return static_cast(rand()) / RAND_MAX - 0.5f; }); + } else if (t->type == kTfLiteInt32) { + // TODO(yunluli): This is currently only used for handling embedding input + // for speech models. Generalize if necessary. + FillRandomValue( + interpreter->typed_tensor(i), + std::vector(sizes.begin() + 1, sizes.end()), + []() { return static_cast(rand()) % 100; }); + } else if (t->type == kTfLiteUInt8) { + FillRandomValue( + interpreter->typed_tensor(i), + std::vector(sizes.begin() + 1, sizes.end()), + []() { return static_cast(rand()) % 255; }); + } else if (t->type == kTfLiteString) { + tflite::DynamicBuffer buffer; + FillRandomString(&buffer, sizes, []() { + return "we're have some friends over saturday to hang out in the yard"; + }); + buffer.WriteToTensor(interpreter->tensor(i)); + } else { + TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name + << " of type " << t->type; + } + } +} + void BenchmarkTfLiteModel::Init() { std::string graph = params_.Get("graph"); model = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); @@ -269,7 +314,7 @@ void BenchmarkTfLiteModel::Init() { TFLITE_LOG(FATAL) << "Failed to construct interpreter"; } profiling_listener_.SetInterpreter(interpreter.get()); - profiling::Context::get().setProfiler(interpreter->GetProfiler()); + ::profiling::Context::get().setProfiler(interpreter->GetProfiler()); const int32_t num_threads = params_.Get("num_threads"); @@ -280,6 +325,16 @@ void BenchmarkTfLiteModel::Init() { bool use_nnapi = params_.Get("use_nnapi"); interpreter->UseNNAPI(use_nnapi); + +#ifdef TFLITE_FLEX + TFLITE_LOG(INFO) << "Instantiating Flex Delegate"; + delegate_ = FlexDelegate::Create(); + if (delegate_) { + interpreter->ModifyGraphWithDelegate(delegate_.get(), + /*allow_dynamic_tensors=*/true); + } +#endif // TFLITE_FLEX + auto interpreter_inputs = interpreter->inputs(); if (!inputs.empty()) { @@ -311,36 +366,6 @@ void BenchmarkTfLiteModel::Init() { if (interpreter->AllocateTensors() != kTfLiteOk) { TFLITE_LOG(FATAL) << "Failed to allocate tensors!"; } - - // Set the values of the input tensors. - for (int j = 0; j < inputs.size(); ++j) { - const InputLayerInfo& input = inputs[j]; - int i = interpreter_inputs[j]; - TfLiteTensor* t = interpreter->tensor(i); - std::vector sizes = input.shape; - - // TODO(ahentz): below we ignore the O-th dimension (number of batches). - if (t->type == kTfLiteFloat32) { - FillRandomValue( - interpreter->typed_tensor(i), - std::vector(sizes.begin() + 1, sizes.end()), - []() { return static_cast(rand()) / RAND_MAX - 0.5f; }); - } else if (t->type == kTfLiteUInt8) { - FillRandomValue( - interpreter->typed_tensor(i), - std::vector(sizes.begin() + 1, sizes.end()), - []() { return static_cast(rand()) % 255; }); - } else if (t->type == kTfLiteString) { - tflite::DynamicBuffer buffer; - FillRandomString(&buffer, sizes, []() { - return "we're have some friends over saturday to hang out in the yard"; - }); - buffer.WriteToTensor(interpreter->tensor(i)); - } else { - TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name - << " of type " << t->type; - } - } } void BenchmarkTfLiteModel::RunImpl() { @@ -357,4 +382,4 @@ void BenchmarkTfLiteModel::RunImpl() { } } // namespace benchmark -} // namespace nnfw +} // namespace tflite diff --git a/tools/tflite_benchmark_model/benchmark_tflite_model.h b/tools/tflite_benchmark_model/benchmark_tflite_model.h deleted file mode 100644 index 7892de1..0000000 --- a/tools/tflite_benchmark_model/benchmark_tflite_model.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef __TFLITE_BENCHMARK_MODEL_BENCHMARK_TFLITE_MODEL_H__ -#define __TFLITE_BENCHMARK_MODEL_BENCHMARK_TFLITE_MODEL_H__ - -#include -#include -#include - -#include "tensorflow/contrib/lite/model.h" -#include "tensorflow/contrib/lite/profiling/profile_summarizer.h" -#include "benchmark_model.h" - -namespace nnfw { -namespace benchmark { - -// Dumps profiling events if profiling is enabled -class ProfilingListener : public BenchmarkListener { - public: - explicit ProfilingListener() : interpreter_(nullptr), has_profiles_(false) {} - - void SetInterpreter(tflite::Interpreter* interpreter); - - void OnSingleRunStart(RunType run_type) override; - - void OnSingleRunEnd() override; - - void OnBenchmarkEnd(const BenchmarkResults& results) override; - - private: - tflite::Interpreter* interpreter_; - tflite::profiling::Profiler profiler_; - tflite::profiling::ProfileSummarizer summarizer_; - bool has_profiles_; -}; - -// Benchmarks a TFLite model by running tflite interpreter. -class BenchmarkTfLiteModel : public BenchmarkModel { - public: - BenchmarkTfLiteModel(); - BenchmarkTfLiteModel(BenchmarkParams params); - - std::vector GetFlags() override; - void LogFlags() override; - bool ValidateFlags() override; - uint64_t ComputeInputBytes() override; - void Init() override; - void RunImpl() override; - virtual ~BenchmarkTfLiteModel() {} - - struct InputLayerInfo { - std::string name; - std::vector shape; - }; - - private: - std::unique_ptr model; - std::unique_ptr interpreter; - std::vector inputs; - ProfilingListener profiling_listener_; -}; - -} // namespace benchmark -} // namespace nnfw - -#endif //__TFLITE_BENCHMARK_MODEL_BENCHMARK_TFLITE_MODEL_H__ diff --git a/tools/tflite_benchmark_model/command_line_flags.cc b/tools/tflite_benchmark_model/command_line_flags.cc deleted file mode 100644 index eacca9f..0000000 --- a/tools/tflite_benchmark_model/command_line_flags.cc +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "command_line_flags.h" - -#include -#include -#include -#include -#include - -namespace nnfw { -namespace { - -template -std::string ToString(T val) { - std::ostringstream stream; - stream << val; - return stream.str(); -} - -bool ParseFlag(const std::string& arg, const std::string& flag, - const std::function& parse_func, - bool* value_parsing_ok) { - *value_parsing_ok = true; - std::string flag_prefix = "--" + flag + "="; - if (arg.find(flag_prefix) != 0) { - return false; - } - bool has_value = arg.size() >= flag_prefix.size(); - *value_parsing_ok = has_value; - if (has_value) { - *value_parsing_ok = parse_func(arg.substr(flag_prefix.size())); - } - return true; -} - -template -bool ParseFlag(const std::string& flag_value, - const std::function& hook) { - std::istringstream stream(flag_value); - T read_value; - stream >> read_value; - if (!stream.eof() && !stream.good()) { - return false; - } - hook(read_value); - return true; -} - -bool ParseBoolFlag(const std::string& flag_value, - const std::function& hook) { - if (flag_value != "true" && flag_value != "false") { - return false; - } - - hook(flag_value == "true"); - return true; -} -} // namespace - -Flag::Flag(const char* name, const std::function& hook, - int32_t default_value, const std::string& usage_text) - : name_(name), - type_(TYPE_INT32), - value_hook_([hook](const std::string& flag_value) { - return ParseFlag(flag_value, hook); - }), - default_for_display_(ToString(default_value)), - usage_text_(usage_text) {} - -Flag::Flag(const char* name, const std::function& hook, - int64_t default_value, const std::string& usage_text) - : name_(name), - type_(TYPE_INT64), - value_hook_([hook](const std::string& flag_value) { - return ParseFlag(flag_value, hook); - }), - default_for_display_(ToString(default_value)), - usage_text_(usage_text) {} - -Flag::Flag(const char* name, const std::function& hook, - float default_value, const std::string& usage_text) - : name_(name), - type_(TYPE_FLOAT), - value_hook_([hook](const std::string& flag_value) { - return ParseFlag(flag_value, hook); - }), - default_for_display_(ToString(default_value)), - usage_text_(usage_text) {} - -Flag::Flag(const char* name, const std::function& hook, - bool default_value, const std::string& usage_text) - : name_(name), - type_(TYPE_BOOL), - value_hook_([hook](const std::string& flag_value) { - return ParseBoolFlag(flag_value, hook); - }), - default_for_display_(default_value ? "true" : "false"), - usage_text_(usage_text) {} - -Flag::Flag(const char* name, - const std::function& hook, - const std::string& default_value, const std::string& usage_text) - : name_(name), - type_(TYPE_STRING), - value_hook_([hook](const std::string& flag_value) { - hook(flag_value); - return true; - }), - default_for_display_(default_value), - usage_text_(usage_text) {} - -bool Flag::Parse(const std::string& arg, bool* value_parsing_ok) const { - return ParseFlag(arg, name_, value_hook_, value_parsing_ok); -} - -std::string Flag::GetTypeName() const { - switch (type_) { - case TYPE_INT32: - return "int32"; - case TYPE_INT64: - return "int64"; - case TYPE_FLOAT: - return "float"; - case TYPE_BOOL: - return "bool"; - case TYPE_STRING: - return "string"; - } - - return "unknown"; -} - -/*static*/ bool Flags::Parse(int* argc, const char** argv, - const std::vector& flag_list) { - bool result = true; - std::vector unknown_flags; - for (int i = 1; i < *argc; ++i) { - if (std::string(argv[i]) == "--") { - while (i < *argc) { - unknown_flags.push_back(argv[i]); - ++i; - } - break; - } - - bool was_found = false; - for (const Flag& flag : flag_list) { - bool value_parsing_ok; - was_found = flag.Parse(argv[i], &value_parsing_ok); - if (!value_parsing_ok) { - result = false; - } - if (was_found) { - break; - } - } - if (!was_found) { - unknown_flags.push_back(argv[i]); - } - } - int dst = 1; // Skip argv[0] - for (auto f : unknown_flags) { - argv[dst++] = f; - } - argv[dst++] = nullptr; - *argc = unknown_flags.size() + 1; - return result && (*argc < 2 || std::strcmp(argv[1], "--help") != 0); -} - -/*static*/ std::string Flags::Usage(const std::string& cmdline, - const std::vector& flag_list) { - std::ostringstream usage_text; - usage_text << "usage: " << cmdline << "\n"; - if (!flag_list.empty()) { - usage_text << "Flags:\n"; - } - - for (const Flag& flag : flag_list) { - auto type_name = flag.GetTypeName(); - usage_text << "\t"; - usage_text << "--" << flag.name_ << "=" << flag.default_for_display_; - usage_text << "\t" << type_name << "\t" << flag.usage_text_ << "\n"; - } - return usage_text.str(); -} - -} // namespace nnfw diff --git a/tools/tflite_benchmark_model/command_line_flags.h b/tools/tflite_benchmark_model/command_line_flags.h deleted file mode 100644 index 766417d..0000000 --- a/tools/tflite_benchmark_model/command_line_flags.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef __TFLITE_BENCHMARK_MODEL_COMMAND_LINE_FLAGS_H__ -#define __TFLITE_BENCHMARK_MODEL_COMMAND_LINE_FLAGS_H__ - -#include -#include -#include - -namespace nnfw { -// A simple command-line argument parsing module. -// Dependency free simplified port of core/util/command_line_flags. -// This class is written for benchmarks and uses inefficient string -// concatenation. This was written to avoid dependency on tensorflow/core/util -// which transitively brings in a lot of other dependencies that are not -// necessary for tflite benchmarking code. -// The recommended way of using it is with local variables and an initializer -// list of Flag objects, for example: -// -// int some_int = 10; -// bool some_switch = false; -// std::string some_name = "something"; -// -// std::vector flag_list = { -// Flag::CreateFlag("some_int", &some_int, "an integer that affects X"), -// Flag::CreateFlag("some_switch", &some_switch, "a bool that affects Y"), -// Flag::CreateFlag("some_name", &some_name, "a string that affects Z") -// }; -// // Get usage message before ParseFlags() to capture default values. -// std::string usage = Flag::Usage(argv[0], flag_list); -// bool parsed_values_ok = Flags::Parse(&argc, argv, flag_list); -// -// tensorflow::port::InitMain(usage.c_str(), &argc, &argv); -// if (argc != 1 || !parsed_values_ok) { -// ...output usage and error message... -// } -// -// The argc and argv values are adjusted by the Parse function so all that -// remains is the program name (at argv[0]) and any unknown arguments fill the -// rest of the array. This means you can check for flags that weren't understood -// by seeing if argv is greater than 1. -// The result indicates if there were any errors parsing the values that were -// passed to the command-line switches. For example, --some_int=foo would return -// false because the argument is expected to be an integer. -// -// NOTE: Unlike gflags-style libraries, this library is intended to be -// used in the `main()` function of your binary. It does not handle -// flag definitions that are scattered around the source code. - -// A description of a single command line flag, holding its name, type, usage -// text, and a pointer to the corresponding variable. -class Flag { - public: - template - static Flag CreateFlag(const char* name, T* val, const char* usage) { - return Flag(name, [val](const T& v) { *val = v; }, *val, usage); - } - - Flag(const char* name, const std::function& hook, - int32_t default_value, const std::string& usage_text); - Flag(const char* name, const std::function& hook, - int64_t default_value, const std::string& usage_text); - Flag(const char* name, const std::function& hook, - float default_value, const std::string& usage_text); - Flag(const char* name, const std::function& hook, - bool default_value, const std::string& usage_text); - Flag(const char* name, const std::function& hook, - const std::string& default_value, const std::string& usage_text); - - private: - friend class Flags; - - bool Parse(const std::string& arg, bool* value_parsing_ok) const; - - std::string name_; - enum { - TYPE_INT32, - TYPE_INT64, - TYPE_BOOL, - TYPE_STRING, - TYPE_FLOAT, - } type_; - - std::string GetTypeName() const; - - std::function value_hook_; - std::string default_for_display_; - - std::string usage_text_; -}; - -class Flags { - public: - // Parse the command line represented by argv[0, ..., (*argc)-1] to find flag - // instances matching flags in flaglist[]. Update the variables associated - // with matching flags, and remove the matching arguments from (*argc, argv). - // Return true iff all recognized flag values were parsed correctly, and the - // first remaining argument is not "--help". - static bool Parse(int* argc, const char** argv, - const std::vector& flag_list); - - // Return a usage message with command line cmdline, and the - // usage_text strings in flag_list[]. - static std::string Usage(const std::string& cmdline, - const std::vector& flag_list); -}; - -} // namespace nnfw - -#endif // __TFLITE_BENCHMARK_MODEL_COMMAND_LINE_FLAGS_H__ - - diff --git a/tools/tflite_benchmark_model/logging.h b/tools/tflite_benchmark_model/logging.h deleted file mode 100644 index e694a09..0000000 --- a/tools/tflite_benchmark_model/logging.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef __TFLITE_BENCHMARK_MODEL_LOGGING_H_ -#define __TFLITE_BENCHMARK_MODEL_LOGGING_H_ - -// LOG and CHECK macros for benchmarks. - -#include -#include -#include - -namespace nnfw { -namespace logging { -// A wrapper that logs to stderr. -// -// Used for TFLITE_LOG and TFLITE_BENCHMARK_CHECK macros. -class LoggingWrapper { - public: - enum class LogSeverity : int { - INFO = 0, - WARN = 1, - ERROR = 2, - FATAL = 3, - }; - LoggingWrapper(LogSeverity severity) - : severity_(severity), should_log_(true) {} - LoggingWrapper(LogSeverity severity, bool log) - : severity_(severity), should_log_(log) {} - std::stringstream& Stream() { return stream_; } - ~LoggingWrapper() { - if (should_log_) { - std::cerr << stream_.str() << std::endl; - if (severity_ == LogSeverity::FATAL) { - std::flush(std::cerr); - std::abort(); - } - } - } - - private: - std::stringstream stream_; - LogSeverity severity_; - bool should_log_; -}; - -} // namespace logging - -} // namespace nnfw - -#define TFLITE_LOG(severity) \ - nnfw::logging::LoggingWrapper( \ - nnfw::logging::LoggingWrapper::LogSeverity::severity) \ - .Stream() - -#define TFLITE_BENCHMARK_CHECK(condition) \ - nnfw::logging::LoggingWrapper( \ - nnfw::logging::LoggingWrapper::LogSeverity::FATAL, \ - (condition) ? false : true) \ - .Stream() - -#define TFLITE_BENCHMARK_CHECK_EQ(a, b) TFLITE_BENCHMARK_CHECK(a == b) - -#endif // __TFLITE_BENCHMARK_MODEL_BENCHMARK_LOGGING_H_ diff --git a/tools/tflite_benchmark_model/profile_summarizer.cc b/tools/tflite_benchmark_model/profile_summarizer.cc index 4d12b50..ce19b0c 100644 --- a/tools/tflite_benchmark_model/profile_summarizer.cc +++ b/tools/tflite_benchmark_model/profile_summarizer.cc @@ -39,8 +39,6 @@ namespace tflite { namespace profiling { namespace { -using Detail = tensorflow::StatsCalculator::Detail; - struct OperatorDetails { std::string name; std::vector inputs; @@ -94,18 +92,30 @@ OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter, } else { op_name = tflite::EnumNamesBuiltinOperator()[code]; } + const char* profiling_string = + interpreter.OpProfilingString(node_reg->second, &node_reg->first); OperatorDetails details; details.name = op_name; + if (profiling_string) { + details.name += ":" + std::string(profiling_string); + } details.inputs = GetTensorNames(interpreter, inputs); details.outputs = GetTensorNames(interpreter, outputs); return details; } +tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() { + auto options = tensorflow::StatSummarizerOptions(); + options.show_summary = true; + options.show_memory = false; + return options; +} + } // namespace ProfileSummarizer::ProfileSummarizer() - : stats_calculator_(new ::tensorflow::StatsCalculator( - tensorflow::StatSummarizerOptions())) {} + : stats_calculator_( + new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {} void ProfileSummarizer::ProcessProfiles( const std::vector& profile_stats, @@ -129,35 +139,22 @@ void ProfileSummarizer::ProcessProfiles( int64_t base_start_us = events[0]->begin_timestamp_us; int node_num = 0; int64_t curr_total_us = 0; - std::map details; int prev_op_idx = -1; - int seq_no = 1; + int child_op_no = 1; for (auto event : events) { auto op_details = GetOperatorDetails(interpreter, event->event_metadata); - bool is_continued = (prev_op_idx == event->event_metadata); - seq_no = is_continued ? seq_no + 1 : 1; - auto node_name = ToString(op_details.outputs) + "#" + std::to_string(seq_no); - auto result = details.emplace(node_name, Detail()); - Detail* detail = &(result.first->second); - detail->start_us.UpdateStat(event->begin_timestamp_us - base_start_us); + bool from_same_op = (prev_op_idx == event->event_metadata); + child_op_no = from_same_op ? child_op_no + 1 : 1; + auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no); + int64_t start_us = event->begin_timestamp_us - base_start_us; int64_t node_exec_time = event->end_timestamp_us - event->begin_timestamp_us; - detail->rel_end_us.UpdateStat(node_exec_time); + stats_calculator_->AddNodeStats(node_name, op_details.name, node_num, + start_us, node_exec_time, 0 /*memory */); curr_total_us += node_exec_time; ++node_num; - - if (result.second) { - detail->name = node_name; - detail->type = op_details.name; - detail->run_order = node_num; - detail->times_called = 0; - } - if (!is_continued) { - ++detail->times_called; - } prev_op_idx = event->event_metadata; } - stats_calculator_->UpdateDetails(details); stats_calculator_->UpdateRunTotalUs(curr_total_us); } } // namespace profiling diff --git a/tools/tflite_benchmark_model/profile_summarizer.h b/tools/tflite_benchmark_model/profile_summarizer.h deleted file mode 100644 index a529ff8..0000000 --- a/tools/tflite_benchmark_model/profile_summarizer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ -#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ - -#include - -#include "tensorflow/contrib/lite/interpreter.h" -#include "tensorflow/contrib/lite/profiling/profiler.h" -#include "tensorflow/core/util/stats_calculator.h" - -namespace tflite { -namespace profiling { - -// Creates a summary of operator invocations in the interpreter. -class ProfileSummarizer { - public: - ProfileSummarizer(); - virtual ~ProfileSummarizer() {} - - // Process profile events to update statistics for operator invocations. - void ProcessProfiles(const std::vector& profile_stats, - const tflite::Interpreter& interpreter); - - // Returns a string detailing the accumulated runtime stats in a tab-separated - // format which can be pasted into a spreadsheet for further analysis. - std::string GetOutputString() const { - return stats_calculator_->GetOutputString(); - } - - std::string GetShortSummary() const { - return stats_calculator_->GetShortSummary(); - } - - private: - std::unique_ptr stats_calculator_; -}; - -} // namespace profiling -} // namespace tflite - -#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ diff --git a/tools/tflite_benchmark_model/stats_calculator.cc b/tools/tflite_benchmark_model/stats_calculator.cc new file mode 100644 index 0000000..5786507 --- /dev/null +++ b/tools/tflite_benchmark_model/stats_calculator.cc @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/stats_calculator.h" + +#include +#include +#include +#include +#include +#include + +namespace tensorflow { + +StatsCalculator::StatsCalculator(const StatSummarizerOptions& options) + : options_(options) {} + +std::string StatsCalculator::GetShortSummary() const { + std::stringstream stream; + stream << "Timings (microseconds): "; + run_total_us_.OutputToStream(&stream); + stream << std::endl; + + stream << "Memory (bytes): "; + memory_.OutputToStream(&stream); + stream << std::endl; + + stream << details_.size() << " nodes observed" << std::endl; + return stream.str(); +} + +std::ostream& InitField(std::ostream& stream, int width) { + stream << "\t" << std::right << std::setw(width) << std::fixed + << std::setprecision(3); + return stream; +} + +std::string StatsCalculator::HeaderString(const std::string& title) const { + std::stringstream stream; + + stream << "============================== " << title + << " ==============================" << std::endl; + + InitField(stream, 24) << "[node type]"; + InitField(stream, 9) << "[start]"; + InitField(stream, 9) << "[first]"; + InitField(stream, 9) << "[avg ms]"; + InitField(stream, 8) << "[%]"; + InitField(stream, 8) << "[cdf%]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 9) << "[times called]"; + stream << "\t" + << "[Name]"; + return stream.str(); +} + +std::string StatsCalculator::ColumnString(const Detail& detail, + const int64_t cumulative_stat_on_node, + const Stat& stat) const { + const double start_ms = detail.start_us.avg() / 1000.0; + const double first_time_ms = detail.rel_end_us.first() / 1000.0; + const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; + const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); + const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); + const int64_t times_called = detail.times_called / num_runs(); + + std::stringstream stream; + InitField(stream, 24) << detail.type; + InitField(stream, 9) << start_ms; + InitField(stream, 9) << first_time_ms; + InitField(stream, 9) << avg_time_ms; + InitField(stream, 7) << percentage << "%"; + InitField(stream, 7) << cdf_percentage << "%"; + InitField(stream, 10) << detail.mem_used.newest() / 1000.0; + InitField(stream, 9) << times_called; + stream << "\t" << detail.name; + + return stream.str(); +} + +void StatsCalculator::OrderNodesByMetric( + SortingMetric metric, std::vector* details) const { + std::priority_queue> sorted_list; + const int num_nodes = details_.size(); + + for (const auto& det : details_) { + const Detail* detail = &(det.second); + std::stringstream stream; + stream << std::setw(20) << std::right << std::setprecision(10) + << std::fixed; + + switch (metric) { + case BY_NAME: + stream << detail->name; + break; + case BY_RUN_ORDER: + stream << num_nodes - detail->run_order; + break; + case BY_TIME: + stream << detail->rel_end_us.avg(); + break; + case BY_MEMORY: + stream << detail->mem_used.avg(); + break; + case BY_TYPE: + stream << detail->type; + break; + default: + stream << ""; + break; + } + + sorted_list.emplace(stream.str(), detail); + } + + while (!sorted_list.empty()) { + auto entry = sorted_list.top(); + sorted_list.pop(); + details->push_back(entry.second); + } +} + +void StatsCalculator::ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const { + int64_t run_count = run_total_us_.count(); + + for (const auto& det : details_) { + const std::string node_name = det.first; + const Detail& detail = det.second; + + int64_t curr_time_val = + static_cast(detail.rel_end_us.sum() / run_count); + *accumulated_us += curr_time_val; + + int64_t curr_memory_val = detail.mem_used.newest(); + + const std::string& node_type = detail.type; + + const std::string sharp1("#1"); + bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend(); + + if (first) { + (*node_type_map_count)[node_type] += 1; + (*node_type_map_times_called)[node_type] += detail.times_called / run_count; + } + (*node_type_map_time)[node_type] += curr_time_val; + (*node_type_map_memory)[node_type] += curr_memory_val; + } +} + +std::string StatsCalculator::GetStatsByNodeType() const { + std::stringstream stream; + + stream << "Number of nodes executed: " << details_.size() << std::endl; + + stream << "============================== Summary by node type " + "==============================" + << std::endl; + + std::map node_type_map_count; + std::map node_type_map_time; + std::map node_type_map_memory; + std::map node_type_map_times_called; + int64_t accumulated_us = 0; + + ComputeStatsByType(&node_type_map_count, &node_type_map_time, + &node_type_map_memory, &node_type_map_times_called, + &accumulated_us); + + // Sort them. + std::priority_queue>> + timings; + for (const auto& node_type : node_type_map_time) { + const int64_t mem_used = node_type_map_memory[node_type.first]; + timings.emplace(node_type.second, + std::pair(node_type.first, mem_used)); + } + + InitField(stream, 24) << "[Node type]"; + InitField(stream, 9) << "[count]"; + InitField(stream, 10) << "[avg ms]"; + InitField(stream, 11) << "[avg %]"; + InitField(stream, 11) << "[cdf %]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 10) << "[times called]"; + stream << std::endl; + + float cdf = 0.0f; + while (!timings.empty()) { + auto entry = timings.top(); + timings.pop(); + + const std::string node_type = entry.second.first; + const float memory = entry.second.second / 1000.0f; + + const int64_t node_type_total_us = entry.first; + const float time_per_run_ms = node_type_total_us / 1000.0f; + + const float percentage = + ((entry.first / static_cast(accumulated_us)) * 100.0f); + cdf += percentage; + + InitField(stream, 24) << node_type; + InitField(stream, 9) << node_type_map_count[node_type]; + InitField(stream, 10) << time_per_run_ms; + InitField(stream, 10) << percentage << "%"; + InitField(stream, 10) << cdf << "%"; + InitField(stream, 10) << memory; + InitField(stream, 9) << node_type_map_times_called[node_type]; + stream << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetStatsByMetric(const std::string& title, + SortingMetric sorting_metric, + int num_stats) const { + std::vector details; + OrderNodesByMetric(sorting_metric, &details); + + double cumulative_stat_on_node = 0; + + std::stringstream stream; + stream << HeaderString(title) << std::endl; + int stat_num = 0; + for (auto detail : details) { + ++stat_num; + if (num_stats > 0 && stat_num > num_stats) { + break; + } + + // TODO(andrewharp): Make this keep track of the particular metric for cdf. + cumulative_stat_on_node += detail->rel_end_us.sum(); + stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) + << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetOutputString() const { + std::stringstream stream; + if (options_.show_run_order) { + stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, + options_.run_order_limit); + } + if (options_.show_time) { + stream << GetStatsByMetric("Top by Computation Time", BY_TIME, + options_.time_limit); + } + if (options_.show_memory) { + stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, + options_.memory_limit); + } + if (options_.show_type) { + stream << GetStatsByNodeType(); + } + if (options_.show_summary) { + stream << GetShortSummary() << std::endl; + } + return stream.str(); +} + +void StatsCalculator::AddNodeStats(const std::string& name, + const std::string& type, int64_t run_order, + int64_t start_us, int64_t rel_end_us, + int64_t mem_used) { + Detail* detail = nullptr; + if (details_.find(name) == details_.end()) { + details_.insert({name, {}}); + detail = &details_.at(name); + detail->type = type; + detail->name = name; + detail->run_order = run_order; + } else { + detail = &details_.at(name); + } + detail->start_us.UpdateStat(start_us); + detail->rel_end_us.UpdateStat(rel_end_us); + detail->mem_used.UpdateStat(mem_used); + detail->times_called++; +} + +} // namespace tensorflow