#define MAX_RANK (4)
#define MAX_SEGMENTS (256) /* 8-bit segment indexing */
#define DATA_GRANULARITY (64) /* MPA_L */
+#define DATA_GRANULARITY_SHIFT (6)
/* npubinfmt magiccode macros */
#define NPUBIN_MAGICCODE (0x53524E5055000000ULL) /* ASCII hex for 'SRNPU' */
return -EPERM;
}
- if (model_buf == nullptr || model_ptr == nullptr)
- return -EINVAL;
+ if (model_buf == nullptr || model_ptr == nullptr)
+ return -EINVAL;
Model *model;
int status;
output.bufs[idx].type = BUFFER_MAPPED;
output.bufs[idx].size = output_tensor_size;
/** user needs to free this */
- output.bufs[idx].addr = malloc (output_tensor_size);
+ output.bufs[idx].addr = calloc (1, output_tensor_size);
auto func = std::bind (TrinityVision2::manipulateData, model, idx, false,
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3);
int status = comm_.insertGenericBuffer (
segt->getOutputSegment(idx)->getData() + segt->getOutputSegmentOffset(idx),
&output.bufs[idx], func);
+
if (status != 0) {
logerr (TAG, "Failed to return output buffer: %d\n", status);
}
/** special handling for TRIV */
if (layout == DATA_LAYOUT_SRNPU && dims[3] != 3 &&
dims[3] % DATA_GRANULARITY != 0) {
- tensor_size *= (1 + dims[3] / DATA_GRANULARITY);
+ uint32_t depth;
+
+ depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
+ depth = depth * DATA_GRANULARITY;
+
+ tensor_size /= dims[3];
+ tensor_size *= depth;
}
return tensor_size;
/** special handling for TRIV */
if (layout == DATA_LAYOUT_SRNPU && dims[3] != 3 &&
dims[3] % DATA_GRANULARITY != 0) {
- tensor_size *= (1 + dims[3] / DATA_GRANULARITY);
+ uint32_t depth;
+
+ depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
+ depth = depth * DATA_GRANULARITY;
+
+ tensor_size /= dims[3];
+ tensor_size *= depth;
}
return tensor_size;
for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
tensor_size *= dims[rank_idx];
+ /** special handling for TRIV2 */
+ if (layout == DATA_LAYOUT_TRIV2 && dims[3] != 3 &&
+ dims[3] % DATA_GRANULARITY != 0) {
+ uint32_t depth;
+
+ depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
+ depth = depth * DATA_GRANULARITY;
+
+ tensor_size /= dims[3];
+ tensor_size *= depth;
+ }
+
return tensor_size;
}
for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
tensor_size *= dims[rank_idx];
+ /** special handling for TRIV2 */
+ if (layout == DATA_LAYOUT_TRIV2 && dims[3] != 3 &&
+ dims[3] % DATA_GRANULARITY != 0) {
+ uint32_t depth;
+
+ depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
+ depth = depth * DATA_GRANULARITY;
+
+ tensor_size /= dims[3];
+ tensor_size *= depth;
+ }
+
return tensor_size;
}
TrinityEmulAPI::~TrinityEmulAPI ()
{
elem_map_.clear();
+ stat_map_.clear();
}
/**
for (uint32_t i = 0; i < data.input_num; i++) {
data.input_offsets[i] = 100 * (i + 1);
data.input_elem_size[i] = 1 * (i + 1);
- for (uint32_t j = 0; j < MAX_RANK; j++)
- data.input_dims[i][j] = 2 * (i + 1);
+ for (uint32_t j = 0; j < MAX_RANK - 1; j++)
+ data.input_dims[i][j] = 1 * (i + 1);
+ data.input_dims[i][MAX_RANK - 1] = DATA_GRANULARITY;
data.input_quant_z[i] = 3 * (i + 1);
data.input_quant_s[i] = 4 * (i + 1);
}
for (uint32_t i = 0; i < data.output_num; i++) {
data.output_offsets[i] = 100 * (i + 1);
data.output_elem_size[i] = 1 * (i + 1);
- for (uint32_t j = 0; j < MAX_RANK; j++)
- data.output_dims[i][j] = 2 * (i + 1);
+ for (uint32_t j = 0; j < MAX_RANK -1; j++)
+ data.output_dims[i][j] = 1 * (i + 1);
+ data.output_dims[i][MAX_RANK - 1] = DATA_GRANULARITY;
data.output_quant_z[i] = 3 * (i + 1);
data.output_quant_s[i] = 4 * (i + 1);
}
meta.input_offsets[i] = 0;
meta.output_offsets[i] = 0;
- for (uint32_t j = 0; j < MAX_RANK; j++) {
- meta.input_dims[i][j] = 8;
- meta.output_dims[i][j] = 8;
+ for (uint32_t j = 0; j < MAX_RANK - 1; j++) {
+ meta.input_dims[i][j] = 2;
+ meta.output_dims[i][j] = 2;
}
+ meta.input_dims[i][MAX_RANK - 1] = DATA_GRANULARITY;
+ meta.output_dims[i][MAX_RANK - 1] = DATA_GRANULARITY;
}
/* no instructions */
meta.program_size = 0;