[Filter/Prop] Add performance properties (lat/fps) for tensor filter

author Dongju Chae <dongju.chae@samsung.com>

Wed, 3 Jun 2020 04:01:35 +0000 (13:01 +0900)

committer MyungJoo Ham <myungjoo.ham@samsung.com>

Thu, 4 Jun 2020 07:22:06 +0000 (16:22 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Wed, 3 Jun 2020 04:01:35 +0000 (13:01 +0900)
committer MyungJoo Ham <myungjoo.ham@samsung.com>
Thu, 4 Jun 2020 07:22:06 +0000 (16:22 +0900)
diff --git a/gst/nnstreamer/include/nnstreamer_plugin_api_filter.h b/gst/nnstreamer/include/nnstreamer_plugin_api_filter.h

index 8d4b427..bc673f0 100644 (file)
--- a/gst/nnstreamer/include/nnstreamer_plugin_api_filter.h
+++ b/gst/nnstreamer/include/nnstreamer_plugin_api_filter.h
@@ -157,6 +157,8 @@ typedef struct _GstTensorFilterProperties
      const char *accl_str; /**< accelerator configuration passed in as parameter, use in GstTensorFilterFramework V0 only */
    };
  
+  int latency; /**< The average latency over the recent 10 inferences in microseconds */
+  int throughput; /**< The average throughput in the number of outputs per second */
  } GstTensorFilterProperties;
  
  /**
@@ -236,7 +238,6 @@ typedef struct _GstTensorFilterFrameworkEventData
        accl_hw *hw_list;   /**< accelerators supported by framework intersected with the new user provided accelerator preference */
        int num_hw;         /**< number of hardare accelerators in the hw_list supported by the framework */
      };
-
    };
  } GstTensorFilterFrameworkEventData;
  
diff --git a/gst/nnstreamer/tensor_filter/tensor_filter.c b/gst/nnstreamer/tensor_filter/tensor_filter.c

index bc1492f..b83e065 100644 (file)
--- a/gst/nnstreamer/tensor_filter/tensor_filter.c
+++ b/gst/nnstreamer/tensor_filter/tensor_filter.c
@@ -331,6 +331,101 @@ gst_tensor_filter_destroy_notify (void *data)
  }
  
  /**
+ * @brief Prepare statistics for performance profiling (e.g, latency, throughput)
+ */
+static void
+prepare_statistics (GstTensorFilterPrivate * priv)
+{
+  priv->stat.latest_invoke_time = g_get_real_time ();
+}
+
+/**
+ * @brief Helper function to accumulate latencies
+ */
+static void
+accumulate_latency (void *data, void *user_data)
+{
+  gint64 *latency = data;
+  gint64 *total_latency = user_data;
+
+  *total_latency += *latency;
+}
+
+#define THRESHOLD_DROP_OLD  (2000)
+#define THRESHOLD_CACHE_OLD (1000)
+
+/**
+ * @brief Record statistics for performance profiling (e.g, latency, throughput)
+ */
+static void
+record_statistics (GstTensorFilterPrivate * priv)
+{
+  gint64 end_time = g_get_real_time ();
+  gint64 *latency = g_new (gint64, 1);
+  GQueue *recent_latencies = priv->stat.recent_latencies;
+
+  *latency = end_time - priv->stat.latest_invoke_time;
+  priv->stat.total_invoke_latency += *latency;
+  priv->stat.total_invoke_num += 1;
+
+  if (g_queue_get_length (recent_latencies) == GST_TF_STAT_MAX_RECENT)
+    g_free (g_queue_pop_head (recent_latencies));
+  g_queue_push_tail (recent_latencies, latency);
+
+  /* the queue should have at least one element */
+  g_assert (g_queue_get_length (recent_latencies) != 0);
+
+  if (priv->latency_mode > 0) {
+    gint64 avg_latency;
+
+    g_queue_foreach (recent_latencies, accumulate_latency, &avg_latency);
+    avg_latency /= g_queue_get_length (recent_latencies);
+
+    /* check integer overflow */
+    if (avg_latency <= INT32_MAX)
+      priv->prop.latency = (gint) avg_latency;
+    else
+      priv->prop.latency = -1;
+  }
+
+  if (priv->throughput_mode > 0) {
+    gint throughput_int = -1;
+
+    if (priv->stat.total_invoke_latency != 0) {
+      gdouble throughput =
+          (gdouble) (priv->stat.total_invoke_num * G_USEC_PER_SEC * 1000) /
+          priv->stat.total_invoke_latency;
+
+      /* check integer overflow */
+      if (throughput <= INT32_MAX)
+        throughput_int = (gint) throughput;
+    }
+
+    /* note that it's a 1000x larger value than actual throughput */
+    priv->prop.throughput = throughput_int;
+  }
+
+  /**
+   * statistics values are monotonously increasing.
+   * to avoid potential overflow, let's cache old values and subtract them
+   * from the statistics if some threshold is exceeded.
+   */
+  if (priv->stat.total_invoke_num > THRESHOLD_DROP_OLD) {
+    priv->stat.total_invoke_latency -= priv->stat.old_total_invoke_latency;
+    priv->stat.total_invoke_num -= priv->stat.old_total_invoke_num;
+    /* drop cached values */
+    priv->stat.old_total_invoke_latency = 0;
+    priv->stat.old_total_invoke_num = 0;
+  } else if (priv->stat.total_invoke_num > THRESHOLD_CACHE_OLD) {
+    /* cache old values if they are not yet set */
+    if (priv->stat.old_total_invoke_num == 0) {
+      priv->stat.old_total_invoke_latency = priv->stat.total_invoke_latency;
+      priv->stat.old_total_invoke_num = priv->stat.total_invoke_num;
+    }
+  }
+}
+
+/**
   * @brief non-ip transform. required vmethod of GstBaseTransform.
   */
  static GstFlowReturn
@@ -349,6 +444,7 @@ gst_tensor_filter_transform (GstBaseTransform * trans,
    guint i, j;
    gint ret;
    gboolean allocate_in_invoke;
+  gboolean need_profiling;
  
    self = GST_TENSOR_FILTER_CAST (trans);
    priv = &self->priv;
@@ -415,9 +511,16 @@ gst_tensor_filter_transform (GstBaseTransform * trans,
      }
    }
  
+  need_profiling = (priv->latency_mode > 0 || priv->throughput_mode > 0);
+  if (TRUE == need_profiling)
+    prepare_statistics (priv);
+
    /* 3. Call the filter-subplugin callback, "invoke" */
    GST_TF_FW_INVOKE_COMPAT (priv, ret, in_tensors, out_tensors);
  
+  if (TRUE == need_profiling)
+    record_statistics (priv);
+
    /* 4. Update result and free map info. */
    for (i = 0; i < prop->output_meta.num_tensors; i++) {
      if (allocate_in_invoke) {
diff --git a/gst/nnstreamer/tensor_filter/tensor_filter_common.c b/gst/nnstreamer/tensor_filter/tensor_filter_common.c

index 537eda1..5154fdd 100644 (file)
--- a/gst/nnstreamer/tensor_filter/tensor_filter_common.c
+++ b/gst/nnstreamer/tensor_filter/tensor_filter_common.c
@@ -109,6 +109,8 @@ enum
    PROP_SUBPLUGINS,
    PROP_ACCELERATOR,
    PROP_IS_UPDATABLE,
+  PROP_LATENCY,
+  PROP_THROUGHPUT,
  };
  
  /**
@@ -394,6 +396,20 @@ gst_tensor_filter_framework_info_init (GstTensorFilterFrameworkInfo * info)
  }
  
  /**
+ * @brief Initialize the GstTensorFilterFrameworkInfo object
+ */
+static void
+gst_tensor_filter_statistics_init (GstTensorFilterStatistics * stat)
+{
+  stat->total_invoke_num = 0;
+  stat->total_invoke_latency = 0;
+  stat->old_total_invoke_num = 0;
+  stat->old_total_invoke_latency = 0;
+  stat->latest_invoke_time = 0;
+  stat->recent_latencies = g_queue_new ();
+}
+
+/**
   * @brief Validate filter sub-plugin's data.
   */
  static gboolean
@@ -674,6 +690,21 @@ gst_tensor_filter_install_properties (GObjectClass * gobject_class)
            "Indicate whether a given model to this tensor filter is "
            "updatable in runtime. (e.g., with on-device training)",
            FALSE, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+  g_object_class_install_property (gobject_class, PROP_LATENCY,
+      g_param_spec_int ("latency", "The average latency",
+          "Turn on performance profiling for the average latency "
+          "over the recent 10 inferences in microseconds. "
+          "Currently, this accepts either 0 (OFF) or 1 (ON).",
+          0 /** min */ , 1 /** max */ , 0 /** default: off */ ,
+          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+  g_object_class_install_property (gobject_class, PROP_THROUGHPUT,
+      g_param_spec_int ("throughput", "The average throughput (FPS)",
+          "Turn on performance profiling for the average throughput "
+          "in the number of outputs per seconds (i.e., FPS), multiplied by 1000 "
+          "to represent a floating point using an integer. "
+          "Currently, this accepts either 0 (OFF) or 1 (ON).",
+          0 /** min */ , 1 /** max */ , 0 /** default: off */ ,
+          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
  }
  
  /**
@@ -685,6 +716,7 @@ gst_tensor_filter_common_init_property (GstTensorFilterPrivate * priv)
    /* init NNFW properties */
    gst_tensor_filter_properties_init (&priv->prop);
    gst_tensor_filter_framework_info_init (&priv->info);
+  gst_tensor_filter_statistics_init (&priv->stat);
  
    /* init internal properties */
    priv->fw = NULL;
@@ -719,6 +751,14 @@ gst_tensor_filter_common_free_property (GstTensorFilterPrivate * priv)
  
    gst_tensors_info_free (&priv->in_config.info);
    gst_tensors_info_free (&priv->out_config.info);
+
+  if (priv->stat.recent_latencies != NULL) {
+    GQueue *queue = priv->stat.recent_latencies;
+    gint64 *latency;
+    while ((latency = g_queue_pop_tail (queue)) != NULL)
+      g_free (latency);
+    g_queue_free (queue);
+  }
  }
  
  /**
@@ -1377,6 +1417,48 @@ _gtfc_setprop_OUTPUTLAYOUT (GstTensorFilterPrivate * priv,
    return 0;
  }
  
+/** @brief Handle "PROP_LATENCY" for set-property */
+static gint
+_gtfc_setprop_LATENCY (GstTensorFilterPrivate * priv,
+    GstTensorFilterProperties * prop, const GValue * value)
+{
+  gint latency_mode;
+
+  if (!value)
+    return 0;
+
+  latency_mode = g_value_get_int (value);
+  if (latency_mode != 0 && latency_mode != 1) {
+    ml_logw ("Invalid argument, nither 0 (OFF) nor 1 (ON).");
+    return 0;
+  }
+
+  priv->latency_mode = latency_mode;
+
+  return 0;
+}
+
+/** @brief Handle "PROP_THROUGHPUT" for set-property */
+static gint
+_gtfc_setprop_THROUGHPUT (GstTensorFilterPrivate * priv,
+    GstTensorFilterProperties * prop, const GValue * value)
+{
+  gint throughput_mode;
+
+  if (!value)
+    return 0;
+
+  throughput_mode = g_value_get_int (value);
+  if (throughput_mode != 0 && throughput_mode != 1) {
+    ml_logw ("Invalid argument, nither 0 (OFF) nor 1 (ON).");
+    return 0;
+  }
+
+  priv->throughput_mode = throughput_mode;
+
+  return 0;
+}
+
  /**
   * @brief Set the properties for tensor_filter
   * @param[in] priv Struct containing the properties of the object
@@ -1439,6 +1521,12 @@ gst_tensor_filter_common_set_property (GstTensorFilterPrivate * priv,
      case PROP_OUTPUTLAYOUT:
        status = _gtfc_setprop_OUTPUTLAYOUT (priv, prop, value);
        break;
+    case PROP_LATENCY:
+      status = _gtfc_setprop_LATENCY (priv, prop, value);
+      break;
+    case PROP_THROUGHPUT:
+      status = _gtfc_setprop_THROUGHPUT (priv, prop, value);
+      break;
      default:
        return FALSE;
    }
@@ -1644,6 +1732,22 @@ gst_tensor_filter_common_get_property (GstTensorFilterPrivate * priv,
          g_value_set_string (value, "");
        }
        break;
+    case PROP_LATENCY:
+      if (priv->latency_mode == 1) {
+        g_value_set_int (value, prop->latency);
+      } else {
+        /* invalid */
+        g_value_set_int (value, -1);
+      }
+      break;
+    case PROP_THROUGHPUT:
+      if (priv->throughput_mode == 1) {
+        g_value_set_int (value, prop->throughput);
+      } else {
+        /* invalid */
+        g_value_set_int (value, -1);
+      }
+      break;
      default:
        /* unknown property */
        return FALSE;
diff --git a/gst/nnstreamer/tensor_filter/tensor_filter_common.h b/gst/nnstreamer/tensor_filter/tensor_filter_common.h

index 190b231..50e07eb 100644 (file)
--- a/gst/nnstreamer/tensor_filter/tensor_filter_common.h
+++ b/gst/nnstreamer/tensor_filter/tensor_filter_common.h
@@ -71,6 +71,21 @@
        } \
      } while (0)
  
+#define GST_TF_STAT_MAX_RECENT (10)
+
+/**
+ * @brief Structure definition for tensor-filter statistics
+ */
+typedef struct _GstTensorFilterStatistics
+{
+  gint64 total_invoke_num;      /**< number of total invokes */
+  gint64 total_invoke_latency;  /**< accumulated invoke latency (usec) */
+  gint64 old_total_invoke_num;      /**< cached value. number of total invokes */
+  gint64 old_total_invoke_latency;  /**< cached value. accumulated invoke latency (usec) */
+  gint64 latest_invoke_time;    /**< the latest invoke time (usec) */
+  void *recent_latencies;       /**< data structure (e.g., queue) to hold recent latencies */
+} GstTensorFilterStatistics;
+
  /**
   * @brief Structure definition for common tensor-filter properties.
   */
@@ -79,6 +94,7 @@ typedef struct _GstTensorFilterPrivate
    void *privateData; /**< NNFW plugin's private data is stored here */
    GstTensorFilterProperties prop; /**< NNFW plugin's properties */
    GstTensorFilterFrameworkInfo info; /**< NNFW framework info */
+  GstTensorFilterStatistics stat; /**< NNFW plugin's statistics */
    const GstTensorFilterFramework *fw; /**< The implementation core of the NNFW. NULL if not configured */
  
    /* internal properties for tensor-filter */
@@ -87,6 +103,9 @@ typedef struct _GstTensorFilterPrivate
    gboolean is_updatable; /**<  a given model to the filter is updatable if TRUE */
    GstTensorsConfig in_config; /**< input tensor info */
    GstTensorsConfig out_config; /**< output tensor info */
+
+  gint latency_mode;     /**< latency profiling mode (0: off, 1: on, ...) */
+  gint throughput_mode;  /**< throughput profiling mode (0: off, 1: on, ...) */
  } GstTensorFilterPrivate;
  
  /**
author	Dongju Chae <dongju.chae@samsung.com>
	Wed, 3 Jun 2020 04:01:35 +0000 (13:01 +0900)
committer	MyungJoo Ham <myungjoo.ham@samsung.com>
	Thu, 4 Jun 2020 07:22:06 +0000 (16:22 +0900)
gst/nnstreamer/include/nnstreamer_plugin_api_filter.h		patch \| blob \| history
gst/nnstreamer/tensor_filter/tensor_filter.c		patch \| blob \| history
gst/nnstreamer/tensor_filter/tensor_filter_common.c		patch \| blob \| history
gst/nnstreamer/tensor_filter/tensor_filter_common.h		patch \| blob \| history