This patch adds performance properties (latency/throughput) for tensor filter.
Those properties will be used for the overlay information, to show the performance
of tensor filter (e.g., SNPE).
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
const char *accl_str; /**< accelerator configuration passed in as parameter, use in GstTensorFilterFramework V0 only */
};
+ int latency; /**< The average latency over the recent 10 inferences in microseconds */
+ int throughput; /**< The average throughput in the number of outputs per second */
} GstTensorFilterProperties;
/**
accl_hw *hw_list; /**< accelerators supported by framework intersected with the new user provided accelerator preference */
int num_hw; /**< number of hardare accelerators in the hw_list supported by the framework */
};
-
};
} GstTensorFilterFrameworkEventData;
}
/**
+ * @brief Prepare statistics for performance profiling (e.g, latency, throughput)
+ */
+static void
+prepare_statistics (GstTensorFilterPrivate * priv)
+{
+ priv->stat.latest_invoke_time = g_get_real_time ();
+}
+
+/**
+ * @brief Helper function to accumulate latencies
+ */
+static void
+accumulate_latency (void *data, void *user_data)
+{
+ gint64 *latency = data;
+ gint64 *total_latency = user_data;
+
+ *total_latency += *latency;
+}
+
+#define THRESHOLD_DROP_OLD (2000)
+#define THRESHOLD_CACHE_OLD (1000)
+
+/**
+ * @brief Record statistics for performance profiling (e.g, latency, throughput)
+ */
+static void
+record_statistics (GstTensorFilterPrivate * priv)
+{
+ gint64 end_time = g_get_real_time ();
+ gint64 *latency = g_new (gint64, 1);
+ GQueue *recent_latencies = priv->stat.recent_latencies;
+
+ *latency = end_time - priv->stat.latest_invoke_time;
+ priv->stat.total_invoke_latency += *latency;
+ priv->stat.total_invoke_num += 1;
+
+ if (g_queue_get_length (recent_latencies) == GST_TF_STAT_MAX_RECENT)
+ g_free (g_queue_pop_head (recent_latencies));
+ g_queue_push_tail (recent_latencies, latency);
+
+ /* the queue should have at least one element */
+ g_assert (g_queue_get_length (recent_latencies) != 0);
+
+ if (priv->latency_mode > 0) {
+ gint64 avg_latency;
+
+ g_queue_foreach (recent_latencies, accumulate_latency, &avg_latency);
+ avg_latency /= g_queue_get_length (recent_latencies);
+
+ /* check integer overflow */
+ if (avg_latency <= INT32_MAX)
+ priv->prop.latency = (gint) avg_latency;
+ else
+ priv->prop.latency = -1;
+ }
+
+ if (priv->throughput_mode > 0) {
+ gint throughput_int = -1;
+
+ if (priv->stat.total_invoke_latency != 0) {
+ gdouble throughput =
+ (gdouble) (priv->stat.total_invoke_num * G_USEC_PER_SEC * 1000) /
+ priv->stat.total_invoke_latency;
+
+ /* check integer overflow */
+ if (throughput <= INT32_MAX)
+ throughput_int = (gint) throughput;
+ }
+
+ /* note that it's a 1000x larger value than actual throughput */
+ priv->prop.throughput = throughput_int;
+ }
+
+ /**
+ * statistics values are monotonously increasing.
+ * to avoid potential overflow, let's cache old values and subtract them
+ * from the statistics if some threshold is exceeded.
+ */
+ if (priv->stat.total_invoke_num > THRESHOLD_DROP_OLD) {
+ priv->stat.total_invoke_latency -= priv->stat.old_total_invoke_latency;
+ priv->stat.total_invoke_num -= priv->stat.old_total_invoke_num;
+ /* drop cached values */
+ priv->stat.old_total_invoke_latency = 0;
+ priv->stat.old_total_invoke_num = 0;
+ } else if (priv->stat.total_invoke_num > THRESHOLD_CACHE_OLD) {
+ /* cache old values if they are not yet set */
+ if (priv->stat.old_total_invoke_num == 0) {
+ priv->stat.old_total_invoke_latency = priv->stat.total_invoke_latency;
+ priv->stat.old_total_invoke_num = priv->stat.total_invoke_num;
+ }
+ }
+}
+
+/**
* @brief non-ip transform. required vmethod of GstBaseTransform.
*/
static GstFlowReturn
guint i, j;
gint ret;
gboolean allocate_in_invoke;
+ gboolean need_profiling;
self = GST_TENSOR_FILTER_CAST (trans);
priv = &self->priv;
}
}
+ need_profiling = (priv->latency_mode > 0 || priv->throughput_mode > 0);
+ if (TRUE == need_profiling)
+ prepare_statistics (priv);
+
/* 3. Call the filter-subplugin callback, "invoke" */
GST_TF_FW_INVOKE_COMPAT (priv, ret, in_tensors, out_tensors);
+ if (TRUE == need_profiling)
+ record_statistics (priv);
+
/* 4. Update result and free map info. */
for (i = 0; i < prop->output_meta.num_tensors; i++) {
if (allocate_in_invoke) {
PROP_SUBPLUGINS,
PROP_ACCELERATOR,
PROP_IS_UPDATABLE,
+ PROP_LATENCY,
+ PROP_THROUGHPUT,
};
/**
}
/**
+ * @brief Initialize the GstTensorFilterFrameworkInfo object
+ */
+static void
+gst_tensor_filter_statistics_init (GstTensorFilterStatistics * stat)
+{
+ stat->total_invoke_num = 0;
+ stat->total_invoke_latency = 0;
+ stat->old_total_invoke_num = 0;
+ stat->old_total_invoke_latency = 0;
+ stat->latest_invoke_time = 0;
+ stat->recent_latencies = g_queue_new ();
+}
+
+/**
* @brief Validate filter sub-plugin's data.
*/
static gboolean
"Indicate whether a given model to this tensor filter is "
"updatable in runtime. (e.g., with on-device training)",
FALSE, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+ g_object_class_install_property (gobject_class, PROP_LATENCY,
+ g_param_spec_int ("latency", "The average latency",
+ "Turn on performance profiling for the average latency "
+ "over the recent 10 inferences in microseconds. "
+ "Currently, this accepts either 0 (OFF) or 1 (ON).",
+ 0 /** min */ , 1 /** max */ , 0 /** default: off */ ,
+ G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+ g_object_class_install_property (gobject_class, PROP_THROUGHPUT,
+ g_param_spec_int ("throughput", "The average throughput (FPS)",
+ "Turn on performance profiling for the average throughput "
+ "in the number of outputs per seconds (i.e., FPS), multiplied by 1000 "
+ "to represent a floating point using an integer. "
+ "Currently, this accepts either 0 (OFF) or 1 (ON).",
+ 0 /** min */ , 1 /** max */ , 0 /** default: off */ ,
+ G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
}
/**
/* init NNFW properties */
gst_tensor_filter_properties_init (&priv->prop);
gst_tensor_filter_framework_info_init (&priv->info);
+ gst_tensor_filter_statistics_init (&priv->stat);
/* init internal properties */
priv->fw = NULL;
gst_tensors_info_free (&priv->in_config.info);
gst_tensors_info_free (&priv->out_config.info);
+
+ if (priv->stat.recent_latencies != NULL) {
+ GQueue *queue = priv->stat.recent_latencies;
+ gint64 *latency;
+ while ((latency = g_queue_pop_tail (queue)) != NULL)
+ g_free (latency);
+ g_queue_free (queue);
+ }
}
/**
return 0;
}
+/** @brief Handle "PROP_LATENCY" for set-property */
+static gint
+_gtfc_setprop_LATENCY (GstTensorFilterPrivate * priv,
+ GstTensorFilterProperties * prop, const GValue * value)
+{
+ gint latency_mode;
+
+ if (!value)
+ return 0;
+
+ latency_mode = g_value_get_int (value);
+ if (latency_mode != 0 && latency_mode != 1) {
+ ml_logw ("Invalid argument, nither 0 (OFF) nor 1 (ON).");
+ return 0;
+ }
+
+ priv->latency_mode = latency_mode;
+
+ return 0;
+}
+
+/** @brief Handle "PROP_THROUGHPUT" for set-property */
+static gint
+_gtfc_setprop_THROUGHPUT (GstTensorFilterPrivate * priv,
+ GstTensorFilterProperties * prop, const GValue * value)
+{
+ gint throughput_mode;
+
+ if (!value)
+ return 0;
+
+ throughput_mode = g_value_get_int (value);
+ if (throughput_mode != 0 && throughput_mode != 1) {
+ ml_logw ("Invalid argument, nither 0 (OFF) nor 1 (ON).");
+ return 0;
+ }
+
+ priv->throughput_mode = throughput_mode;
+
+ return 0;
+}
+
/**
* @brief Set the properties for tensor_filter
* @param[in] priv Struct containing the properties of the object
case PROP_OUTPUTLAYOUT:
status = _gtfc_setprop_OUTPUTLAYOUT (priv, prop, value);
break;
+ case PROP_LATENCY:
+ status = _gtfc_setprop_LATENCY (priv, prop, value);
+ break;
+ case PROP_THROUGHPUT:
+ status = _gtfc_setprop_THROUGHPUT (priv, prop, value);
+ break;
default:
return FALSE;
}
g_value_set_string (value, "");
}
break;
+ case PROP_LATENCY:
+ if (priv->latency_mode == 1) {
+ g_value_set_int (value, prop->latency);
+ } else {
+ /* invalid */
+ g_value_set_int (value, -1);
+ }
+ break;
+ case PROP_THROUGHPUT:
+ if (priv->throughput_mode == 1) {
+ g_value_set_int (value, prop->throughput);
+ } else {
+ /* invalid */
+ g_value_set_int (value, -1);
+ }
+ break;
default:
/* unknown property */
return FALSE;
} \
} while (0)
+#define GST_TF_STAT_MAX_RECENT (10)
+
+/**
+ * @brief Structure definition for tensor-filter statistics
+ */
+typedef struct _GstTensorFilterStatistics
+{
+ gint64 total_invoke_num; /**< number of total invokes */
+ gint64 total_invoke_latency; /**< accumulated invoke latency (usec) */
+ gint64 old_total_invoke_num; /**< cached value. number of total invokes */
+ gint64 old_total_invoke_latency; /**< cached value. accumulated invoke latency (usec) */
+ gint64 latest_invoke_time; /**< the latest invoke time (usec) */
+ void *recent_latencies; /**< data structure (e.g., queue) to hold recent latencies */
+} GstTensorFilterStatistics;
+
/**
* @brief Structure definition for common tensor-filter properties.
*/
void *privateData; /**< NNFW plugin's private data is stored here */
GstTensorFilterProperties prop; /**< NNFW plugin's properties */
GstTensorFilterFrameworkInfo info; /**< NNFW framework info */
+ GstTensorFilterStatistics stat; /**< NNFW plugin's statistics */
const GstTensorFilterFramework *fw; /**< The implementation core of the NNFW. NULL if not configured */
/* internal properties for tensor-filter */
gboolean is_updatable; /**< a given model to the filter is updatable if TRUE */
GstTensorsConfig in_config; /**< input tensor info */
GstTensorsConfig out_config; /**< output tensor info */
+
+ gint latency_mode; /**< latency profiling mode (0: off, 1: on, ...) */
+ gint throughput_mode; /**< throughput profiling mode (0: off, 1: on, ...) */
} GstTensorFilterPrivate;
/**