unclassified->node_id = -1;
unclassified->running_cycles = 0;
+ unclassified->start_cycles = 0;
+ unclassified->end_cycles = 0;
unclassified->dram_read_bytes = 0;
unclassified->dram_write_bytes = 0;
unclassified->sram_read_bytes = 0;
layer->node_id = id;
layer->running_cycles = 0;
+ layer->start_cycles = 0;
+ layer->end_cycles = 0;
layer->dram_read_bytes = 0;
layer->dram_write_bytes = 0;
layer->sram_read_bytes = 0;
/** TODO: evenly divided to fused layers */
layer->running_cycles += profile->layers[i].running_cycles / node_num;
+ if (layer->start_cycles == 0)
+ layer->start_cycles = profile->layers[i].start_cycles;
+ if (layer->end_cycles < profile->layers[i].end_cycles)
+ layer->end_cycles = profile->layers[i].end_cycles;
layer->dram_read_bytes += profile->layers[i].dram_read_bytes / node_num;
layer->dram_write_bytes += profile->layers[i].dram_write_bytes / node_num;
layer->sram_read_bytes += profile->layers[i].sram_read_bytes / node_num;
memset (layer, '\x00', sizeof (npu_profile_layer));
+ layer->running_cycles = common.cycle_end - common.cycle_start;
+ layer->start_cycles = common.cycle_start;
+ layer->end_cycles = common.cycle_end;
+
switch (common.block_id) {
case TRIV2PROF_BLOCKID_NNA:
ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
- layer->running_cycles = common.cycle_end - common.cycle_start;
break;
case TRIV2PROF_BLOCKID_NNA_DMA_IN:
ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
- layer->running_cycles = common.cycle_end - common.cycle_start;
layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
break;
ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
- layer->running_cycles = common.cycle_end - common.cycle_start;
layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
break;
ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
- layer->running_cycles = common.cycle_end - common.cycle_start;
break;
case TRIV2PROF_BLOCKID_DSP_DMA_IN:
ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
- layer->running_cycles = common.cycle_end - common.cycle_start;
layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
break;
ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
- layer->running_cycles = common.cycle_end - common.cycle_start;
layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
break;
cerr << "[" << i << "] " << profile.layers[i].name << "\n";
if (profile.layers[i].running_cycles > 0)
cerr << "\tRunning Cycles : " << profile.layers[i].running_cycles << "\n";
+ if (profile.layers[i].start_cycles > 0)
+ cerr << "\tStart Cycles : " << profile.layers[i].start_cycles << "\n";
+ if (profile.layers[i].end_cycles > 0)
+ cerr << "\tEnd Cycles : " << profile.layers[i].end_cycles << "\n";
if (profile.layers[i].dram_read_bytes > 0)
cerr << "\tDRAM Read (KB) : " << (profile.layers[i].dram_read_bytes >> 10) << "\n";
if (profile.layers[i].dram_write_bytes > 0)