This patch supports profiling ioctl () of CUSE device.
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
ne_install_conf.set('RESV_MEM_SIZE', get_option('resv_mem_size'))
ne_install_conf.set('NUM_THREADS', get_option('num_threads'))
+ne_install_conf.set('PREFIX_SHARE', ne_prefix + get_option('prefix_share'))
+ne_install_conf.set('PREFIX_PROFILE', get_option('prefix_profile'))
ne_install_conf.set('LOG_DIR', get_option('log_dir'))
ne_install_conf.set('LOG_LEVEL', get_option('log_level'))
option('comm_opt', type : 'string', value : 'ip')
option('resv_mem_size', type : 'string', value : '0')
option('num_threads', type : 'string', value : '8')
+option('prefix_share', type : 'string', value : '/share')
+option('prefix_profile', type : 'string', value : '/tmp')
option('log_dir', type : 'string', value : '/tmp/')
option('log_level', type : 'string', value : '1')
option('enable_tizen', type : 'boolean', value : false)
[main]
num_threads=@NUM_THREADS@
resv_mem_size=@RESV_MEM_SIZE@
+prefix_share=@PREFIX_SHARE@
+prefix_profile=@PREFIX_PROFILE@
[debug]
log_dir=@LOG_DIR@
dev_type dev_type_; /**< emulated device type */
- char *prefix_share_; /**< prefix of share directory */
- char *prefix_profile_; /**< prefix of profile directory */
+ const char *prefix_share_; /**< prefix of share directory */
+ const char *prefix_profile_; /**< prefix of profile directory */
};
#endif
#include <ne-conf.h>
#define MAX_EMUL_DEVICES (3)
-#define ENV_PREFIX_SHARE "NE_PREFIX_SHARE"
-#define ENV_PREFIX_PROFILE "NE_PREFIX_PROFILE"
-#define DEFAULT_PREFIX_SHARE NE_PREFIX "/share"
-#define DEFAULT_PREFIX_PROFILE "/tmp"
class EmulReq {
public:
*/
TrinityEmulAPI::TrinityEmulAPI (int dev_id, dev_type type)
: DriverAPI (dev_id), dev_type_ (type) {
- static char default_prefix_share[] = DEFAULT_PREFIX_SHARE;
- static char default_prefix_profile[] = DEFAULT_PREFIX_PROFILE;
-
- prefix_share_ = getenv (ENV_PREFIX_SHARE);
- if (prefix_share_ == nullptr) {
- prefix_share_ = default_prefix_share;
- setenv (ENV_PREFIX_SHARE, default_prefix_share, 1);
- }
-
- prefix_profile_ = getenv (ENV_PREFIX_PROFILE);
- if (prefix_profile_ == nullptr) {
- prefix_profile_ = default_prefix_profile;
- setenv (ENV_PREFIX_PROFILE, default_prefix_profile, 1);
- }
+ prefix_share_ = Conf::getInstance ().getPrefixShare ();
+ prefix_profile_ = Conf::getInstance ().getPrefixProfile ();
}
/** @brief destructor of emulation API driver */
#include <iostream>
#include "NPUdrvAPI.h"
+#include <ne-conf.h>
#include <npubinfmt.h>
/* Only 2 devices are supported for now. 8 will be enough value even in future */
return 0;
profile->total_system_cycles = t_profile.total_cycles;
- profile->dram_input_footprint = -1;
- profile->dram_output_footprint = -1;
+ profile->dram_input_footprint = t_profile.input_footprint;
+ profile->dram_output_footprint = t_profile.output_footprint;
+
+ if (is_cuse_) {
+ std::string prof_path = Conf::getInstance ().getPrefixProfile ();
+ prof_path += "/ne_profile." + std::to_string (req_id) + ".rec";
+ profile->prof_path = strdup (prof_path.c_str ());
+ }
profile->num_layers = t_profile.total_ops;
profile->layers = new npu_profile_layer[profile->num_layers];
ret =
ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE_BUFF, &t_profile);
if (ret != 0) {
- delete[](profile->layers);
+ delete[] profile->layers;
profile->layers = nullptr;
profile->num_layers = 0;
call_once (once_flag_, []() { instance_.reset (new Conf); });
instance_->loadConf (inipath);
+
+ /* the below envvars should be set */
+ setenv (ENV_PREFIX_SHARE, instance_->getPrefixShare (), 1);
+ setenv (ENV_PREFIX_PROFILE, instance_->getPrefixProfile (), 1);
+
return *(instance_.get ());
}
Conf::reset () {
setResvMemSize (std::to_string (default_resv_mem_size).c_str ());
setNumThreads (std::to_string (default_num_threads).c_str ());
+ setPrefixShare (default_prefix_share);
+ setPrefixProfile (default_prefix_profile);
setLogLevel (std::to_string (default_log_level).c_str ());
setLogDir (dafault_log_dir);
}
}
/**
+ * @brief Set the configuration of prefix share dir
+ * @param[in] prefix prefix string for share dir
+ */
+void
+Conf::setPrefixShare (const char *prefix) {
+ if (prefix && strlen (prefix) < MAX_DIR_LEN) {
+ memcpy (prefix_share_, prefix, strlen (prefix));
+ prefix_share_[strlen (prefix)] = '\x00';
+ }
+}
+
+/**
+ * @brief Set the configuration of prefix profile dir
+ * @param[in] prefix prefix string for profile dir
+ */
+void
+Conf::setPrefixProfile (const char *prefix) {
+ if (prefix && strlen (prefix) < MAX_DIR_LEN) {
+ memcpy (prefix_profile_, prefix, strlen (prefix));
+ prefix_profile_[strlen (prefix)] = '\x00';
+ }
+}
+
+/**
* @brief Set the configuration of log directory
* @param[in] log_dir config string for log dir
*/
iniparser_getstring (ini, "main:resv_mem_size", nullptr);
const char *num_threads =
iniparser_getstring (ini, "main:num_threads", nullptr);
+ const char *prefix_share =
+ iniparser_getstring (ini, "main:prefix_share", nullptr);
+ const char *prefix_profile =
+ iniparser_getstring (ini, "main:prefix_profile", nullptr);
+
const char *log_dir = iniparser_getstring (ini, "debug:log_dir", nullptr);
const char *log_level = iniparser_getstring (ini, "debug:log_level", nullptr);
setResvMemSize (resv_mem_size);
setNumThreads (num_threads);
setLogLevel (log_level);
+ setPrefixShare (prefix_share);
+ setPrefixProfile (prefix_profile);
}
/**
const char *resv_mem_size = getenv (ENV_RESV_MEM_SIZE);
const char *num_threads = getenv (ENV_NUM_THREADS);
const char *log_level = getenv (ENV_LOG_LEVEL);
+ const char *prefix_share = getenv (ENV_PREFIX_SHARE);
+ const char *prefix_profile = getenv (ENV_PREFIX_PROFILE);
setLogDir (log_dir);
setResvMemSize (resv_mem_size);
setNumThreads (num_threads);
setLogLevel (log_level);
+ setPrefixShare (prefix_share);
+ setPrefixProfile (prefix_profile);
}
#define ENV_LOG_DIR "NE_LOG_DIR"
#define ENV_LOG_LEVEL "NE_LOG_LEVEL"
+#define ENV_PREFIX_SHARE "NE_PREFIX_SHARE"
+#define ENV_PREFIX_PROFILE "NE_PREFIX_PROFILE"
+
static const uint32_t default_num_threads = 8;
static const size_t default_resv_mem_size =
0; /* default mode is the CMA allocation */
static const char dafault_log_dir[] = "/tmp/";
static const uint32_t default_log_level =
1; /* default log level showing error messages only */
+static const char default_prefix_share[] = NE_PREFIX "/share";
+static const char default_prefix_profile[] = "/tmp";
/** @brief Configuration for NPU-Engine components */
class Conf {
size_t getResvMemSize () { return reserved_mem_size_; }
const char *getLogDir () { return log_dir_; }
uint32_t getLogLevel () { return log_level_; }
+ const char *getPrefixShare () { return prefix_share_; }
+ const char *getPrefixProfile () { return prefix_profile_; }
void reset ();
void setNumThreads (const char *str);
void setResvMemSize (const char *str);
+ void setPrefixShare (const char *str);
+ void setPrefixProfile (const char *str);
void setLogDir (const char *str);
void setLogLevel (const char *str);
void setLayerDump (const char *str);
/**< NE_NUM_THREADS, [main] num_threads, the number of threads in thread pool */
size_t reserved_mem_size_;
/**< NE_RESV_MEM_SIZE, [main] resv_mem_size, the size of memory reserved */
+ char prefix_share_[MAX_DIR_LEN];
+ /**< NE_PREFIX_SHARE, [main] prefix_share, the path where share directory is located */
+ char prefix_profile_[MAX_DIR_LEN];
+ /**< NE_PREFIX_PROFILE, [main] prefix_profile, the path where profile data is created */
char log_dir_[MAX_DIR_LEN];
/**< NE_LOG_DIR, [debug] log_dir, the path where log files are created */
uint32_t log_level_;
* @bug No known bugs except for NYI items
*/
-#include <triv2profile.h>
-#include <ne-conf.h>
-#include <ne-utils.h>
-#include <atomic>
-
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
-#include "trinity-cuse.h"
+#include <iostream>
+#include <fstream>
+#include <atomic>
+
#include <triv2profile.h>
+#include "trinity-cuse.h"
-#define ENV_PREFIX_SHARE "NE_PREFIX_SHARE"
-#define ENV_PREFIX_PROFILE "NE_PREFIX_PROFILE"
-#define DEFAULT_PREFIX_SHARE NE_PREFIX "/share"
-#define DEFAULT_PREFIX_PROFILE "/tmp"
+#include <ne-utils.h>
#define TRIV2_MAX_SEGMENTS 256
-static const char *prefix_share;
-static const char *prefix_profile;
-
/**
* @brief Emulated Dmabuf Impl.
*/
uint64_t metadata_ext_size_;
};
+/**
+ * @brief Emulated Profile Impl.
+ */
+class EmulProfile {
+ public:
+ EmulProfile (int req_id, const std::string &prof_path)
+ : req_id_ (req_id), prof_path_ (prof_path) {}
+
+ uint32_t getNumLayers () const { return profile_.num_layers; }
+ int64_t getTotalCycles () const { return profile_.total_system_cycles; }
+ int64_t getInputFootprint () const { return profile_.dram_input_footprint; }
+ int64_t getOutputFootprint () const { return profile_.dram_output_footprint; }
+ void *getData () const { return profile_.layers; }
+ size_t getDataSize () const {
+ return profile_.num_layers * sizeof (npu_profile_layer);
+ }
+
+ bool parse () {
+ std::ifstream ifs (prof_path_, std::ios::binary);
+ if (!ifs.good ()) {
+ std::cerr << "Failed to find the profile data " << prof_path_ << "\n";
+ return false;
+ }
+
+ profile_.prof_path = strdup (prof_path_.c_str ());
+ if (!profile_.prof_path) {
+ std::cerr << "Unable to duplicate the profile path " << prof_path_
+ << "\n";
+ return false;
+ }
+
+ T2PF_HEAD head;
+ ifs.read ((char *) &head, sizeof (T2PF_HEAD));
+
+ if (head.fmt_vesion != TRIV2PROF_FMT_VER) {
+ std::cerr << "Profile data format mismatch: "
+ << "(" << head.fmt_vesion << " vs. " << TRIV2PROF_FMT_VER
+ << ")\n";
+ return false;
+ }
+
+ uint32_t total_dump = 0;
+
+ /* Neual Network Accelerator (NNA) */
+ total_dump += head.nna.num_of_dump;
+ total_dump += head.nna_dma_in.num_of_dump;
+ total_dump += head.nna_dma_out.num_of_dump;
+
+ /* Digital Signal Processor (DSP) */
+ total_dump += head.dsp.num_of_dump;
+ total_dump += head.dsp_dma_in.num_of_dump;
+ total_dump += head.dsp_dma_out.num_of_dump;
+
+ if (total_dump > 0) {
+ profile_.layers = new npu_profile_layer[total_dump];
+ profile_.num_layers = total_dump;
+ profile_.total_system_cycles = head.total_cycles;
+ profile_.dram_input_footprint = head.nna_dma_in.access_footprint_byte;
+ profile_.dram_output_footprint = head.nna_dma_out.access_footprint_byte;
+ profile_.dram_input_footprint += head.dsp_dma_in.access_footprint_byte;
+ profile_.dram_output_footprint += head.dsp_dma_out.access_footprint_byte;
+
+ for (uint32_t i = 0; i < total_dump; i++) {
+ npu_profile_layer *layer = &profile_.layers[i];
+
+ T2PF_DUMP common;
+ T2PF_DUMP_NNA nna;
+ T2PF_DUMP_DMA nna_dma;
+ T2PF_DUMP_DSP dsp;
+ T2PF_DUMP_DMA dsp_dma;
+
+ std::streampos pos;
+
+ pos = ifs.tellg ();
+ ifs.read ((char *) &common, sizeof (T2PF_DUMP));
+ ifs.seekg (pos);
+
+ memset (layer, '\x00', sizeof (npu_profile_layer));
+
+ layer->running_cycles = common.cycle_end - common.cycle_start;
+ layer->start_cycles = common.cycle_start;
+ layer->end_cycles = common.cycle_end;
+
+ switch (common.block_id) {
+ case TRIV2PROF_BLOCKID_NNA:
+ ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
+ break;
+ case TRIV2PROF_BLOCKID_NNA_DMA_IN:
+ ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
+
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
+ layer->dram_read_bytes =
+ nna_dma.src_addr_end - nna_dma.src_addr_start;
+ layer->sram_write_bytes =
+ nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+ break;
+ case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
+ ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
+
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
+ layer->dram_write_bytes =
+ nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+ layer->sram_read_bytes =
+ nna_dma.src_addr_end - nna_dma.src_addr_start;
+ break;
+ case TRIV2PROF_BLOCKID_DSP:
+ ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
+ break;
+ case TRIV2PROF_BLOCKID_DSP_DMA_IN:
+ ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
+ layer->dram_read_bytes =
+ dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+ layer->sram_write_bytes =
+ dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+ break;
+ case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
+ ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
+ layer->dram_write_bytes =
+ dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+ layer->sram_read_bytes =
+ dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+ break;
+ default:
+ std::cerr << "Unknown block id detected: " << common.block_id
+ << std::endl;
+ delete[] profile_.layers;
+ profile_.layers = nullptr;
+ ifs.close ();
+ return false;
+ }
+ }
+ }
+
+ ifs.close ();
+ return true;
+ }
+
+ private:
+ int req_id_;
+ std::string prof_path_;
+ npu_profile profile_;
+};
+
+/** @brief Global file descriptor */
std::atomic<int> EmulDmabuf::global_fd_ (1);
/** @brief Global mapping of dmabuf */
ThreadSafeMap<int, EmulDmabuf> global_dmabuf_map;
std::atomic<uint64_t> EmulModel::global_id_ (1);
/** @brief Global mapping of models */
ThreadSafeMap<uint64_t, EmulModel> global_model_map;
+/** @brief Global mapping of profile data */
+ThreadSafeMap<int, EmulProfile> global_profile_map;
/**
* @brief TRIV2 impl. of get_version ioctl()
triv2_get_tops (ctx, &tops);
- std::string cmd_path = NE_PREFIX "/share";
+ std::string cmd_path (ctx->prefix_share);
if (tops == 2)
cmd_path += "/mRPsim/triv2_2tops.cmd";
else
cmd_path += "/mRPsim/triv2.cmd";
- std::string prof_path = "/tmp";
- prof_path += "/ne_profile." + std::to_string (in->req_id);
+ int req_id = in->req_id;
+ std::string prof_path (ctx->prefix_profile);
+ prof_path += "/ne_profile." + std::to_string (req_id);
run_triv2_emul (prog, segt, meta, cmd_path.c_str (), prof_path.c_str ());
+ EmulProfile *profile = new EmulProfile (req_id, prof_path + ".rec");
+ global_profile_map.insert (req_id, profile);
+
delete[] segt;
return 0;
}
triv2_get_profile_meta (trinity_cuse_context *ctx,
const struct trinity_ioctl_profile *in,
struct trinity_ioctl_profile *out) {
- /* NYI */
+ EmulProfile *profile = global_profile_map.find (in->req_id);
+ if (profile == nullptr)
+ return -ENOENT;
+
+ if (!profile->parse ())
+ return -EINVAL;
+
*out = *in;
- out->total_ops = 0;
+ out->total_ops = profile->getNumLayers ();
+ out->total_cycles = profile->getTotalCycles ();
+ out->input_footprint = profile->getInputFootprint ();
+ out->output_footprint = profile->getOutputFootprint ();
+ out->profile_size = out->total_ops * sizeof (npu_profile_layer);
+
return 0;
}
static int
triv2_get_profile_buff (trinity_cuse_context *ctx,
const struct trinity_ioctl_profile *in,
- struct trinity_ioctl_profile *out) {
- /* NYI */
+ struct trinity_ioctl_profile *out, void *data) {
+ EmulProfile *profile = global_profile_map.find (in->req_id);
+ if (profile == nullptr)
+ return -ENOENT;
+
*out = *in;
+ memcpy (data, profile->getData (), profile->getDataSize ());
+
+ global_profile_map.remove (in->req_id);
+
return 0;
}
/** @brief Setting the ioctl vtable */
bool
set_ioctl_vtable_triv2 (trinity_cuse_ioctl_vtable **vtable) {
- static char default_prefix_share[] = DEFAULT_PREFIX_SHARE;
- static char default_prefix_profile[] = DEFAULT_PREFIX_PROFILE;
-
if (vtable == nullptr)
return false;
*vtable = &triv2_vtable;
-
- prefix_share = getenv (ENV_PREFIX_SHARE);
- if (prefix_share == nullptr) {
- prefix_share = default_prefix_share;
- setenv (ENV_PREFIX_SHARE, default_prefix_share, 1);
- }
- prefix_profile = getenv (ENV_PREFIX_PROFILE);
- if (prefix_profile == nullptr) {
- prefix_profile = default_prefix_profile;
- setenv (ENV_PREFIX_PROFILE, default_prefix_profile, 1);
- }
-
return true;
}
#include <sys/ioctl.h>
+#include <ne-conf.h>
#include "trinity-cuse.h"
static trinity_cuse_ioctl_vtable *vtable;
ctx = new trinity_cuse_context;
ctx->app_id = fuse_req_ctx (req)->pid;
+ ctx->prefix_share = Conf::getInstance ().getPrefixShare ();
+ ctx->prefix_profile = Conf::getInstance ().getPrefixProfile ();
fi->fh = (uint64_t) ctx;
fuse_reply_open (req, fi);
&out_val, sizeof (out_val));
}
break;
- case TRINITY_IOCTL_GET_PROFILE_BUFF:
- if (!in_size || !out_size) {
- struct iovec iov = {arg, sizeof (struct trinity_ioctl_profile)};
- fuse_reply_ioctl_retry (req, &iov, 1, &iov, 1);
- } else {
- const struct trinity_ioctl_profile *in_val =
- static_cast<const struct trinity_ioctl_profile *> (in_buf);
- struct trinity_ioctl_profile out_val;
- fuse_reply_ioctl (req, vtable->get_profile_buff (ctx, in_val, &out_val),
- &out_val, sizeof (out_val));
+ case TRINITY_IOCTL_GET_PROFILE_BUFF: {
+ struct iovec arg_iov = {arg, sizeof (struct trinity_ioctl_profile)};
+ struct iovec out_iov[2];
+
+ /* read input arg */
+ if (!in_size) {
+ fuse_reply_ioctl_retry (req, &arg_iov, 1, NULL, 0);
+ break;
+ }
+
+ const struct trinity_ioctl_profile *in_arg =
+ static_cast<const struct trinity_ioctl_profile *> (in_buf);
+
+ /* prepare output arg & buffer */
+ if (!out_size) {
+ out_iov[0] = arg_iov;
+ out_iov[1] = {in_arg->profile_buf, in_arg->profile_size};
+ fuse_reply_ioctl_retry (req, &arg_iov, 1, out_iov, 2);
+ break;
}
+
+ struct trinity_ioctl_profile out_arg;
+ char *data = new char[in_arg->profile_size];
+ int ret = vtable->get_profile_buff (ctx, in_arg, &out_arg, data);
+
+ out_iov[0] = {&out_arg, sizeof (struct trinity_ioctl_profile)};
+ out_iov[1] = {data, in_arg->profile_size};
+
+ fuse_reply_ioctl_iov (req, ret, out_iov, 2);
+
+ delete[] data;
break;
+ }
/* NYI */
default:
fuse_reply_err (req, ENOTTY);
typedef struct {
int app_id;
std::mutex mutex;
+ const char *prefix_share;
+ const char *prefix_profile;
/* NYI */
} trinity_cuse_context;
struct trinity_ioctl_profile *);
int (*get_profile_buff) (trinity_cuse_context *,
const struct trinity_ioctl_profile *,
- struct trinity_ioctl_profile *);
+ struct trinity_ioctl_profile *, void *);
} trinity_cuse_ioctl_vtable;
#endif