if (node_num > 0) {
uint32_t *node_ids = (uint32_t *) (meta_profile->entry_data + pos);
-
+ int64_t num = static_cast<int64_t>(node_num);
for (uint32_t j = 0; j < node_num; j++) {
uint32_t node_id = node_ids[j];
auto it = node_table.find (node_id);
npu_profile_layer *layer = it->second;
/** TODO: evenly divided to fused layers */
- layer->running_cycles += profile->layers[i].running_cycles / node_num;
+ layer->running_cycles += profile->layers[i].running_cycles / num;
if (layer->start_cycles == 0)
layer->start_cycles = profile->layers[i].start_cycles;
if (layer->end_cycles < profile->layers[i].end_cycles)
layer->end_cycles = profile->layers[i].end_cycles;
- layer->dram_read_bytes += profile->layers[i].dram_read_bytes / node_num;
- layer->dram_write_bytes += profile->layers[i].dram_write_bytes / node_num;
- layer->sram_read_bytes += profile->layers[i].sram_read_bytes / node_num;
- layer->sram_write_bytes += profile->layers[i].sram_write_bytes / node_num;
+ layer->dram_read_bytes += profile->layers[i].dram_read_bytes / num;
+ layer->dram_write_bytes += profile->layers[i].dram_write_bytes / num;
+ layer->sram_read_bytes += profile->layers[i].sram_read_bytes / num;
+ layer->sram_write_bytes += profile->layers[i].sram_write_bytes / num;
layer->visa_exec_seq = -1;
} else {
std::cerr << "Unable to find the node ID " << node_id << std::endl;
total_dump += head.dsp_dma_in.num_of_dump;
total_dump += head.dsp_dma_out.num_of_dump;
- if (total_dump > 0) {
+ if (total_dump > 0 && total_dump < UINT32_MAX) {
profile_.layers = new npu_profile_layer[total_dump];
profile_.num_layers = total_dump;
profile_.total_system_cycles = head.total_cycles;
- profile_.dram_input_footprint = head.nna_dma_in.access_footprint_byte;
- profile_.dram_output_footprint = head.nna_dma_out.access_footprint_byte;
- profile_.dram_input_footprint += head.dsp_dma_in.access_footprint_byte;
- profile_.dram_output_footprint += head.dsp_dma_out.access_footprint_byte;
+ profile_.dram_input_footprint = static_cast<int64_t>(head.nna_dma_in.access_footprint_byte);
+ profile_.dram_output_footprint = static_cast<int64_t>(head.nna_dma_out.access_footprint_byte);
+ profile_.dram_input_footprint += static_cast<int64_t>(head.dsp_dma_in.access_footprint_byte);
+ profile_.dram_output_footprint += static_cast<int64_t>(head.dsp_dma_out.access_footprint_byte);
for (uint32_t i = 0; i < total_dump; i++) {
npu_profile_layer *layer = &profile_.layers[i];
layer->running_cycles = common.cycle_end - common.cycle_start;
layer->start_cycles = common.cycle_start;
layer->end_cycles = common.cycle_end;
- layer->visa_prog_seq = i;
+ layer->visa_prog_seq = static_cast<int64_t>(i);
layer->visa_exec_seq = exec_seq_++;
switch (common.block_id) {
layer->visa_opcode = 0x02;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
- layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
- layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+ layer->dram_read_bytes = static_cast<int64_t>(nna_dma.src_addr_end - nna_dma.src_addr_start);
+ layer->sram_write_bytes = static_cast<int64_t>(nna_dma.dest_addr_end - nna_dma.dest_addr_start);
break;
case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
layer->visa_opcode = 0x03;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
- layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
- layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
+ layer->dram_write_bytes = static_cast<int64_t>(nna_dma.dest_addr_end - nna_dma.dest_addr_start);
+ layer->sram_read_bytes = static_cast<int64_t>(nna_dma.src_addr_end - nna_dma.src_addr_start);
break;
case TRIV2PROF_BLOCKID_DSP:
ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
layer->visa_opcode = 0x40;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
- layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
- layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+ layer->dram_read_bytes = static_cast<int64_t>(dsp_dma.src_addr_end - dsp_dma.src_addr_start);
+ layer->sram_write_bytes = static_cast<int64_t>(dsp_dma.dest_addr_end - dsp_dma.dest_addr_start);
break;
case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
layer->visa_opcode = 0x41;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
- layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
- layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+ layer->dram_write_bytes = static_cast<int64_t>(dsp_dma.dest_addr_end - dsp_dma.dest_addr_start);
+ layer->sram_read_bytes = static_cast<int64_t>(dsp_dma.src_addr_end - dsp_dma.src_addr_start);
break;
default:
std::cerr << "Unknown block id detected: " << common.block_id << std::endl;
/* Digital Signal Processor (DSP) */
total_dump += head.dsp.num_of_dump;
- if (total_dump > 0) {
+ if (total_dump > 0 && total_dump < UINT32_MAX) {
profile_.layers = new npu_profile_layer[total_dump];
profile_.num_layers = total_dump;
profile_.total_system_cycles = head.total_cycles;
- profile_.dram_input_footprint = head.dma_in.access_footprint_byte;
- profile_.dram_output_footprint = head.dma_out.access_footprint_byte;
+ profile_.dram_input_footprint = static_cast<int64_t>(head.dma_in.access_footprint_byte);
+ profile_.dram_output_footprint = static_cast<int64_t>(head.dma_out.access_footprint_byte);
for (uint32_t i = 0; i < total_dump; i++) {
npu_profile_layer *layer = &profile_.layers[i];
layer->running_cycles = common.cycle_end - common.cycle_start;
layer->start_cycles = common.cycle_start;
layer->end_cycles = common.cycle_end;
- layer->visa_prog_seq = i;
+ layer->visa_prog_seq = static_cast<int64_t>(i);
layer->visa_exec_seq = exec_seq_++;
switch (common.block_id) {
layer->visa_opcode = 0x02;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
- layer->dram_read_bytes = dma.src_addr_end - dma.src_addr_start;
- layer->sram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
+ layer->dram_read_bytes = static_cast<int64_t>(dma.src_addr_end - dma.src_addr_start);
+ layer->sram_write_bytes = static_cast<int64_t>(dma.dest_addr_end - dma.dest_addr_start);
break;
case TRIV2PROF_BLOCKID_DMA_OUT:
ifs.read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
layer->visa_opcode = 0x03;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
- layer->dram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
- layer->sram_read_bytes = dma.src_addr_end - dma.src_addr_start;
+ layer->dram_write_bytes = static_cast<int64_t>(dma.dest_addr_end - dma.dest_addr_start);
+ layer->sram_read_bytes = static_cast<int64_t>(dma.src_addr_end - dma.src_addr_start);
break;
case TRIV2PROF_BLOCKID_DSP:
ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));