From 8905be4ab9d7b5e1415b06ac63a854e6a22ba444 Mon Sep 17 00:00:00 2001 From: Dongju Chae Date: Thu, 4 Jul 2019 17:20:19 +0900 Subject: [PATCH] [N44] Add prototype impl. of N44 This commit includes the basic implementation for N44. Signed-off-by: Dongju Chae --- core/npu-engine/src/ne-armplugin.c | 528 ++++++++++++++++++++++++++++++++++ core/npu-engine/src/ne-inputservice.h | 2 - 2 files changed, 528 insertions(+), 2 deletions(-) create mode 100644 core/npu-engine/src/ne-armplugin.c diff --git a/core/npu-engine/src/ne-armplugin.c b/core/npu-engine/src/ne-armplugin.c new file mode 100644 index 0000000..3db02ec --- /dev/null +++ b/core/npu-engine/src/ne-armplugin.c @@ -0,0 +1,528 @@ +/** + * Proprietary + * Copyright (C) 2019 Samsung Electronics + * Copyright (C) 2019 Dongju Chae + * Copyright (C) 2019 MyungJoo Ham + */ +/** + * @file NE-armplugin.c + * @date 3 Jul 2019 + * @brief API to create ARM post processing binaries. + * @see http://suprem.sec.samsung.net/confluence/display/ODLC/Software+Stack + * @author Dongju Chae + * @author MyungJoo Ham + * @bug No known bugs except for NYI items + */ + +#include + +#include "ne-inputservice.h" +#include "ne-inf.h" +#include "ne-utils.h" + +#include +#include + +#define TAG _N44 + +#define GET_PRIVATE(inputservice) (inputservice_priv *) (inputservice)->pdata + +/** @brief constructor prototype for initialization */ +void init_npu_arm_plugin_inputservice (void) __attribute__ ((constructor)); + +/** @brief destructor prototype for termination */ +void fini_npu_arm_plugin_inputservice (void) __attribute__ ((destructor)); + +/** @brirg thread stop mode */ +typedef enum { + THREAD_STATE_NONE, + THREAD_STATE_RUN, + THREAD_STATE_STOP, + THREAD_STATE_PREEMPT, +} thread_state; + +/** @brief the structure for the private data of arm plugin */ +typedef struct { + npu_arm_plugin *plugin; + void *p_data; + list_node list; +} npu_arm_plugin_priv; + +/** @brief the argument for callbacks */ +typedef struct { + n40_data *data; + uint64_t offset; + uint64_t size; +} callback_arg; + +/** @brief the structure for the private data of arm plugin */ +typedef struct { + inputservice *inputservice; + + const submodel *sm; + n40_data *data; + + pthread_mutex_t *mutex; + pthread_cond_t *cond; + + struct { + pthread_t t; + pthread_mutex_t mutex; + pthread_cond_t cond; + thread_state state; + int data_ready; + } thread; + + struct { + pthread_t t; + pthread_mutex_t mutex; + pthread_cond_t cond; + thread_state state; + callback_arg arg; + int cb_ready; + } cb_thread; + + list plugin_list; +} inputservice_priv; + +/** + * @brief find the private data for arm plugin associcated to model + */ +static npu_arm_plugin_priv * +find_plugin_priv (inputservice_priv *priv) +{ + const model *model = priv->sm->parent; + const npubin_meta *meta = model->meta; + npu_arm_plugin_priv *plugin_priv; + + /* find its target plugin */ + list_for_each_entry (plugin_priv, priv->plugin_list, list) { + npu_arm_plugin *plugin = plugin_priv->plugin; + if (plugin->model_id == meta->model_id && + plugin->model_version == meta->model_version) { + break; + } + } + + return plugin_priv; +} + +/** + * @brief callback thread + */ +static void * +run_callback (void * arg) +{ + inputservice_priv *priv = arg; + callback_arg *cb_arg = &priv->cb_thread.arg; + + pthread_mutex_lock (&priv->cb_thread.mutex); + while (priv->cb_thread.state == THREAD_STATE_RUN) { + n40_data *data; + uint64_t offset; + uint64_t size; + + while (cb_arg->data == NULL) + pthread_cond_wait (&priv->cb_thread.cond, &priv->cb_thread.mutex); + + if (priv->cb_thread.state != THREAD_STATE_RUN) + break; + + data = cb_arg->data; + offset = cb_arg->offset; + size = cb_arg->size; + + cb_arg->data = NULL; + pthread_mutex_unlock (&priv->cb_thread.mutex); + + data->cb (data->inbuf, offset, size, data->cb_data); + + pthread_mutex_lock (&priv->cb_thread.mutex); + } + pthread_mutex_unlock (&priv->cb_thread.mutex); + + return NULL; +} + +/** + * @brief plugin thread to perform post-processing in .so file. + */ +static void * +run_plugin (void * arg) +{ + inputservice_priv *priv = arg; + + const submodel *sm = priv->sm; + const npubin_submeta *submeta = sm->meta; + + const model *m = sm->parent; + const npubin_meta *meta = m->meta; + + npu_arm_plugin_priv *plugin_priv; + npu_arm_plugin *plugin; + + pthread_mutex_lock (priv->mutex); + + plugin_priv = find_plugin_priv (priv); + + pthread_mutex_unlock (priv->mutex); + + if (plugin_priv == NULL) { + logerr (TAG, "Cannot find the target plugin (%lu,%lu)\n", + meta->model_id, meta->model_version); + return NULL; + } + + plugin = plugin_priv->plugin; + assert (plugin); + + pthread_mutex_lock (&priv->thread.mutex); + while (priv->thread.state == THREAD_STATE_RUN) { + hwmem *hwmem; + char *buf; + + /* it's waked up whenever new data is ready */ + while (!priv->thread.data_ready) { + pthread_cond_wait (&priv->thread.cond, &priv->thread.mutex); + if (priv->thread.state > THREAD_STATE_RUN) + goto out; + } + priv->thread.data_ready = 0; + + assert (priv->data); + + if (buffer_get_hwmem (priv->data->inbuf, &hwmem) < 0) { + logerr (TAG, "Fail to get hwmem for inbuf\n"); + goto out; + } + + if (hwmem_get_data (hwmem, (void **) &buf) < 0) { + logerr (TAG, "Fail to get data for hwmem\n"); + goto out; + } + + /* this lock can be unlocked because we have the associated buffer */ + pthread_mutex_unlock (&priv->thread.mutex); + + plugin->process (buf + submeta->input_offset, submeta->input_size, + buf + submeta->output_offset, submeta->output_size, + plugin_priv->p_data, m->inputSequence); + + /* in case of preempt, output data is invalid. so, we don't need to launch callback */ + if (priv->thread.state != THREAD_STATE_PREEMPT) { + pthread_mutex_lock (&priv->cb_thread.mutex); + + if (priv->cb_thread.state == THREAD_STATE_NONE) { + priv->cb_thread.state = THREAD_STATE_RUN; + priv->cb_thread.arg.data = NULL; + + if (pthread_create (&priv->cb_thread.t, NULL, run_callback, (void*) priv) != 0 || + pthread_detach (priv->cb_thread.t) != 0) { + logerr (TAG, "Fail to create callback thread\n"); + priv->cb_thread.state = THREAD_STATE_NONE; + } + } + + if (priv->cb_thread.state == THREAD_STATE_RUN) { + priv->cb_thread.arg.data = priv->data; + priv->cb_thread.arg.offset = submeta->output_offset; + priv->cb_thread.arg.size = submeta->output_size; + pthread_cond_broadcast (&priv->cb_thread.cond); + } + + pthread_mutex_unlock (&priv->cb_thread.mutex); + } + + pthread_mutex_lock (&priv->thread.mutex); + } + +out: + /* stop callback thread */ + if (priv->cb_thread.state != THREAD_STATE_NONE) { + pthread_mutex_lock (&priv->cb_thread.mutex); + priv->cb_thread.state = THREAD_STATE_NONE; + pthread_cond_broadcast (&priv->cb_thread.cond); + pthread_mutex_unlock (&priv->cb_thread.mutex); + } + + /* broadcast to N4C's output callback wrapper */ + if (priv->thread.state == THREAD_STATE_STOP) { + pthread_mutex_lock (priv->mutex); + pthread_cond_broadcast (priv->cond); + pthread_mutex_unlock (priv->mutex); + } + + priv->thread.state = THREAD_STATE_NONE; + pthread_mutex_unlock (&priv->thread.mutex); + + return NULL; +} + +/** + * @brief get status of the input service + * @note it's assumed that N4C has already acquired the lock of this input service + */ +static n40_status +arm_plugin_get_status (inputservice *me) +{ + inputservice_priv *priv = GET_PRIVATE (me); + n40_status status; + + assert (priv); + + pthread_mutex_lock (&priv->thread.mutex); + + status = (priv->thread.state == THREAD_STATE_NONE ? N40_IDLE : N40_BUSY); + + pthread_mutex_unlock (&priv->thread.mutex); + + return status; +} + +/** + * @brief configure the input service + * @note it's assumed that N4C has already acquired the lock of this input service + */ +static int +arm_plugin_configure (inputservice *me, const submodel *m) +{ + inputservice_priv *priv = GET_PRIVATE (me); + + assert (priv); + + if (arm_plugin_get_status (me) != N40_IDLE) { + logerr (TAG, "Fail to configure. A previous inference is not yet finished\n"); + return -EBUSY; + } + + /* this plugin is supposed to perform some processing for this submodel */ + + priv->sm = m; + priv->data = NULL; + + return 0; +} + +/** + * @brief halt the input service + * @note it's assumed that N4C has already acquired the lock of this input service + */ +static int +arm_plugin_halt (inputservice *me, n40_haltmode mode) +{ + inputservice_priv *priv = GET_PRIVATE (me); + + assert (priv); + + /* check whether is's finished */ + + pthread_mutex_lock (&priv->thread.mutex); + if (mode == HALT_NOW) { + /* preempt the current processing */ + npu_arm_plugin_priv *plugin_priv = find_plugin_priv (priv); + + if (!plugin_priv) { + pthread_mutex_unlock (&priv->thread.mutex); + return -EINVAL; + } + + assert (plugin_priv->plugin); + + priv->thread.state = THREAD_STATE_PREEMPT; + plugin_priv->plugin->preempt (); + /* wait thread is finished */ + while (&priv->thread.state != THREAD_STATE_NONE) + pthread_cond_wait (&priv->thread.cond, &priv->thread.mutex); + } else { /* HALT_NEXT */ + /* just notify the thread can be stopped after current processing is finishsed */ + priv->thread.state = THREAD_STATE_STOP; + pthread_cond_broadcast (&priv->thread.cond); + } + pthread_mutex_unlock (&priv->thread.mutex); + + return 0; +} + +/** + * @brief start the input service + * @note it's assumed that N4C has already acquired the lock of this input service + */ +static int +arm_plugin_start (inputservice *me) +{ + inputservice_priv *priv = GET_PRIVATE (me); + + assert (priv); + + if (arm_plugin_get_status (me) != N40_IDLE) { + logerr (TAG, "Fail to configure. A previous inference is not yet finished\n"); + return -EBUSY; + } + + priv->thread.state = THREAD_STATE_RUN; + priv->thread.data_ready = 0; + + if (pthread_create (&priv->thread.t, NULL, run_plugin, (void *) priv) < 0) { + logerr (TAG, "Fail to create plugin thread (errno: %d)\n", errno); + return -errno; + } + + /* don't release its resource using join */ + if (pthread_detach (priv->thread.t) < 0) { + logerr (TAG, "Fail to detach plugin thread (errno: %d)\n", errno); + return -errno; + } + + return 0; +} + +/** + * @brief move to next input + * @note it's assumed that N4C has already acquired the lock of this input service + */ +static int +arm_plugin_next (inputservice *me, n40_data *data) +{ + inputservice_priv *priv = GET_PRIVATE (me); + + assert (priv); + + pthread_mutex_lock (&priv->thread.mutex); + + priv->data = data; + priv->thread.data_ready = 1; + + pthread_cond_broadcast (&priv->thread.cond); + pthread_mutex_unlock (&priv->thread.mutex); + + return 0; +} + +/** @brief the instance for arm plugin inputservice */ +static inputservice arm_plugin_inputservice = { + .configure = arm_plugin_configure, + .halt = arm_plugin_halt, + .getStatus = arm_plugin_get_status, + .start = arm_plugin_start, + .next = arm_plugin_next, +}; + +/** @brief register the arm plugin */ +int +register_npu_arm_plugin (npu_arm_plugin *plugin, void *p_data) +{ + inputservice_priv *priv = GET_PRIVATE (&arm_plugin_inputservice); + npu_arm_plugin_priv *plugin_priv; + + if (plugin == NULL) + return -EINVAL; + + plugin_priv = (npu_arm_plugin_priv *) malloc (sizeof (npu_arm_plugin_priv)); + if (plugin_priv == NULL) + return -ENOMEM; + + plugin_priv->plugin = plugin; + plugin_priv->p_data = p_data; + + pthread_mutex_lock (priv->mutex); + list_add (&priv->plugin_list, &plugin_priv->list); + pthread_mutex_unlock (priv->mutex); + + return 0; +} + +/** @brief deregister the arm plugin */ +int +deregister_npu_arm_plugin (npu_arm_plugin *plugin) +{ + inputservice_priv *priv = GET_PRIVATE (&arm_plugin_inputservice); + npu_arm_plugin_priv *plugin_priv, *tmp; + int err = ENOENT; + + pthread_mutex_lock (priv->mutex); + + list_for_each_entry_safe (plugin_priv, tmp, priv->plugin_list, list) { + if (plugin_priv->plugin == plugin) { + list_del (&priv->plugin_list, &plugin_priv->list); + free (plugin_priv); + err = 0; + break; + } + } + + pthread_mutex_unlock (priv->mutex); + + return -err; +} + +/** @brief Initialize arm plugin inputservice */ +void +init_npu_arm_plugin_inputservice (void) +{ + inputservice_priv *priv; + + priv = (inputservice_priv *) malloc (sizeof (inputservice_priv)); + if (priv == NULL) + return; + + priv->inputservice = &arm_plugin_inputservice; + priv->mutex = &arm_plugin_inputservice.mutex; + priv->cond = &arm_plugin_inputservice.cond; + + arm_plugin_inputservice.pdata = (void *) priv; + + list_init (&priv->plugin_list); + + pthread_mutex_init (priv->mutex, NULL); + pthread_cond_init (priv->cond, NULL); + + pthread_mutex_init (&priv->thread.mutex, NULL); + pthread_cond_init (&priv->thread.cond, NULL); + + pthread_mutex_init (&priv->cb_thread.mutex, NULL); + pthread_cond_init (&priv->cb_thread.cond, NULL); + + priv->thread.state = THREAD_STATE_NONE; + priv->cb_thread.state = THREAD_STATE_NONE; + + arm_plugin_inputservice.initialized = 1; + + if (n40_register_input_service (N4_OPS_ARM, &arm_plugin_inputservice) < 0) { + pthread_mutex_destroy (priv->mutex); + pthread_cond_destroy (priv->cond); + + pthread_mutex_destroy (&priv->thread.mutex); + pthread_cond_destroy (&priv->thread.cond); + + pthread_mutex_destroy (&priv->cb_thread.mutex); + pthread_cond_destroy (&priv->cb_thread.cond); + + free (priv); + + arm_plugin_inputservice.initialized = 0; + } +} + +/** @brief Terminate arm plugin inputservice */ +void +fini_npu_arm_plugin_inputservice (void) +{ + if (arm_plugin_inputservice.initialized) { + inputservice_priv *priv = GET_PRIVATE (&arm_plugin_inputservice); + + n40_unregister_input_service (N4_OPS_ARM); + + pthread_mutex_destroy (priv->mutex); + pthread_cond_destroy (priv->cond); + + pthread_mutex_destroy (&priv->thread.mutex); + pthread_cond_destroy (&priv->thread.cond); + + pthread_mutex_destroy (&priv->cb_thread.mutex); + pthread_cond_destroy (&priv->cb_thread.cond); + + free (priv); + + arm_plugin_inputservice.initialized = 0; + } +} diff --git a/core/npu-engine/src/ne-inputservice.h b/core/npu-engine/src/ne-inputservice.h index a78ee4f..8e72e06 100644 --- a/core/npu-engine/src/ne-inputservice.h +++ b/core/npu-engine/src/ne-inputservice.h @@ -36,8 +36,6 @@ typedef enum { N40_ERROR = 0xffffffff } n40_status; -struct _inputservice; -struct _n40_data; typedef struct _inputservice inputservice; typedef struct _n40_data n40_data; -- 2.7.4