From ae2cac294aa8228c68523499e4690ae274169695 Mon Sep 17 00:00:00 2001 From: Parichay Kapoor Date: Tue, 18 Aug 2020 11:27:56 +0900 Subject: [PATCH] [ext/nnfw] Check availability of neon at runtime This patch adds checking the availability of NEON SIMD instructions on the machine at runtime. If the NEON is not found, the auto accelerator of NEON backend is switched to basic CPU. However, the NEON is still in the supported list of backends, and this does not limit user from setting the backend of NEON even though its not available. **Self evaluation:** 1. Build test: [x]Passed [ ]Failed [ ]Skipped 2. Run test: [x]Passed [ ]Failed [ ]Skipped Signed-off-by: Parichay Kapoor --- .../tensor_filter/tensor_filter_common.c | 134 ++++++++++++++++++++- tests/nnstreamer_filter_tensorflow_lite/runTest.sh | 2 +- 2 files changed, 130 insertions(+), 6 deletions(-) diff --git a/gst/nnstreamer/tensor_filter/tensor_filter_common.c b/gst/nnstreamer/tensor_filter/tensor_filter_common.c index aeafcf1..c86b83a 100644 --- a/gst/nnstreamer/tensor_filter/tensor_filter_common.c +++ b/gst/nnstreamer/tensor_filter/tensor_filter_common.c @@ -24,6 +24,14 @@ */ #include +#if defined(__aarch64__) || defined(__arm__) +#include +#if defined(__ANDROID__) +#include +#else /* __ANDROID __ */ +#include +#endif /* __android __ */ +#endif /* __arch64__ __arm__ */ #include #include @@ -2056,9 +2064,64 @@ add_basic_supported_accelerators (const gchar ** supported_accelerators) } /** + * @brief Filter accelerators based on the runtime system + * @note returned array must be freed by the caller + * @details This filters out NEON accelerator if the system running the + * tensor_filter does not support NEON instructions + */ +static const gchar ** +filter_supported_accelerators (const gchar ** supported_accelerators) +{ + gint num_hw = 0, idx = 0; + const gchar **accl_support; + gboolean neon_support = TRUE; + +#if defined(__aarch64__) || defined(__arm__) + glong hwcap_flag; + +#if defined(__aarch64__) + hwcap_flag = HWCAP_ASIMD; +#elif defined(__arm__) + hwcap_flag = HWCAP_NEON; +#endif + + if (getauxval (AT_HWCAP) & hwcap_flag) { + neon_support = TRUE; + } else { + neon_support = FALSE; + } +#endif + + /** Count number of elements for the array */ + while (supported_accelerators[num_hw] != NULL) { + num_hw += 1; + } + + /** Allocate the array */ + accl_support = g_malloc (sizeof (gchar *) * (num_hw + 1)); + + /** Fill the array */ + idx = 0; + num_hw = 0; + while (supported_accelerators[idx] != NULL) { + if (get_accl_hw_type (supported_accelerators[idx]) == ACCL_CPU_NEON && + !neon_support) { + g_critical ("Neon instructions are not available on this device."); + } else { + accl_support[num_hw] = supported_accelerators[idx]; + num_hw += 1; + } + idx += 1; + } + accl_support[num_hw] = NULL; + + return accl_support; +} + +/** * @brief parse user given string to extract accelerator based on given regex * @param[in] accelerators user given input - * @param[in] supported_accelerators list ofi supported accelerators + * @param[in] supported_accelerators list of supported accelerators * @param[in] auto_accelerator accelerator to use in auto case (when acceleration is enabled but specific accelerator is not provided or not matching) * @param[in] default_accelerator accelerator to use by default * @return Corresponding accelerator. Returns ACCL_NONE if not found. @@ -2101,6 +2164,36 @@ parse_accl_hw_util (const gchar * accelerators, } /** + * @brief Check if this accelerator can be used based on the runtime system + * @retval 0 if filter can be used, -errno otherwise + */ +static gint +runtime_check_supported_accelerator (const gchar * accl) +{ + const gchar **accl_support, **filtered_accl_support; + gint ret = 0; + + /** Allocate the array */ + accl_support = g_malloc (sizeof (gchar *) * (2)); + + /** Fill the array */ + accl_support[0] = accl; + accl_support[1] = NULL; + + filtered_accl_support = filter_supported_accelerators (accl_support); + if (!filtered_accl_support || filtered_accl_support[0] == NULL) { + ret = -ENOENT; + } else { + ret = 0; + } + + g_free (filtered_accl_support); + g_free (accl_support); + + return ret; +} + +/** * @brief parse user given string to extract accelerator based on given regex filling in optional arguments */ accl_hw @@ -2109,14 +2202,45 @@ parse_accl_hw_fill (parse_accl_args accl_args) const gchar *in_accl = accl_args.in_accl; const gchar **sup_accl = accl_args.sup_accl; const gchar *def_accl, *auto_accl; + const gchar **filtered_accl; + accl_hw ret = ACCL_NONE; if (accl_args.sup_accl == NULL || accl_args.sup_accl[0] == NULL) - return ACCL_NONE; + return ret; - def_accl = accl_args.def_accl ? accl_args.def_accl : accl_args.sup_accl[0]; - auto_accl = accl_args.auto_accl ? accl_args.auto_accl : accl_args.sup_accl[0]; + /** remove unsupported accelerators from this list based on runtime system */ + filtered_accl = filter_supported_accelerators (accl_args.sup_accl); + if (!filtered_accl) { + return ret; + } + + /** filtered supported accelerators can be empty */ + sup_accl = filtered_accl; + if (sup_accl[0] == NULL) { + g_free (filtered_accl); + return ret; + } - return parse_accl_hw_util (in_accl, sup_accl, auto_accl, def_accl); + /** update default accelerator if it is not available at runtime */ + if (accl_args.def_accl && + runtime_check_supported_accelerator (accl_args.def_accl) == 0) { + def_accl = accl_args.def_accl; + } else { + def_accl = sup_accl[0]; + } + + /** update auto accelerator if it is not available at runtime */ + if (accl_args.auto_accl && + runtime_check_supported_accelerator (accl_args.auto_accl) == 0) { + auto_accl = accl_args.auto_accl; + } else { + auto_accl = sup_accl[0]; + } + + ret = parse_accl_hw_util (in_accl, sup_accl, auto_accl, def_accl); + g_free (filtered_accl); + + return ret; } /** diff --git a/tests/nnstreamer_filter_tensorflow_lite/runTest.sh b/tests/nnstreamer_filter_tensorflow_lite/runTest.sh index 7b77734..47cff44 100644 --- a/tests/nnstreamer_filter_tensorflow_lite/runTest.sh +++ b/tests/nnstreamer_filter_tensorflow_lite/runTest.sh @@ -96,7 +96,7 @@ function run_pipeline() { } arch=$(uname -m) -if [ "$arch" = "aarch64" ] || [ "$arch" = "armv7l" ]; then +if [ "$arch" = "armv7l" ]; then auto_accl="cpu.neon" elif [ "$arch" = "x86_64" ]; then auto_accl="cpu.simd" -- 2.7.4