#define NPU_PROFILE_SIZE (256)
#define NPU_OPNAME_MAX (128)
+/**
+ * Note that the below structure is shared among kernel/decoder and emulation codes.
+ * Please make sure any updates do not hurt the exising SW stack.
+ */
typedef struct {
union {
struct {
- char name[NPU_OPNAME_MAX]; /**< node name (null-terminated) */
- int32_t node_id; /**< node id ('-1' means unclassified node) */
+ char name[NPU_OPNAME_MAX]; /**< node/vISA name (null-terminated) */
int64_t running_cycles;
int64_t sram_read_bytes;
int64_t sram_write_bytes;
- int64_t visa_exec_seq; /**< vISA global execution sequence */
-
int64_t start_cycles;
int64_t end_cycles;
+ union {
+ /* layer-level profiling specific info. */
+ struct {
+ int32_t node_id; /**< node id ('-1' means unclassified node) */
+ };
+ /* vISA-level profiling specific info. */
+ struct {
+ uint32_t visa_opcode; /**< viSA opcode */
+ int64_t visa_prog_seq; /**< vISA program sequence */
+ int64_t visa_exec_seq; /**< vISA execution sequence (global) */
+ };
+ };
/** TODO: Add more info */
} __attribute__ ((packed, aligned));
char reserved[NPU_PROFILE_SIZE];
#include <ne-conf.h>
#define MAX_EMUL_DEVICES (3)
+static uint64_t global_exec_seq = 0;
class EmulReq {
public:
layer->running_cycles = common.cycle_end - common.cycle_start;
layer->start_cycles = common.cycle_start;
layer->end_cycles = common.cycle_end;
+ layer->visa_opcode = 0; /* NYI */
+ layer->visa_prog_seq = i;
+ /* In the first run, program sequence == exec sequence */
+ layer->visa_exec_seq = global_exec_seq++;
switch (common.block_id) {
case TRIV2PROF_BLOCKID_NNA: