habanalabs: added open_stats info ioctl
authorYuri Nudelman <ynudelman@habana.ai>
Mon, 24 May 2021 08:25:21 +0000 (11:25 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Fri, 18 Jun 2021 12:23:42 +0000 (15:23 +0300)
In a system with multiple ASICs, there is a need to provide monitoring
tools with information on how long a device was opened and how many
times a device was opened.

Therefore, we add a new opcode to the INFO ioctl to provide that
information.

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/habanalabs_ioctl.c
include/uapi/misc/habanalabs.h

index e56f517..37ce38d 100644 (file)
@@ -132,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp)
                dev_warn(hdev->dev,
                        "Device is still in use because there are live CS and/or memory mappings\n");
 
+       hdev->last_open_session_duration_jif =
+               jiffies - hdev->last_successful_open_jif;
+
        return 0;
 }
 
index 244fbf2..6c9a81c 100644 (file)
@@ -2137,6 +2137,11 @@ struct hl_mmu_funcs {
  *                          the error will be ignored by the driver during
  *                          device initialization. Mainly used to debug and
  *                          workaround firmware bugs
+ * @last_successful_open_jif: timestamp (jiffies) of the last successful
+ *                            device open.
+ * @last_open_session_duration_jif: duration (jiffies) of the last device open
+ *                                  session.
+ * @open_counter: number of successful device open operations.
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @card_type: Various ASICs have several card types. This indicates the card
@@ -2259,6 +2264,9 @@ struct hl_device {
        u64                             max_power;
        u64                             clock_gating_mask;
        u64                             boot_error_status_mask;
+       u64                             last_successful_open_jif;
+       u64                             last_open_session_duration_jif;
+       u64                             open_counter;
        atomic_t                        in_reset;
        enum hl_pll_frequency           curr_pll_profile;
        enum cpucp_card_types           card_type;
index 4d377a3..4194cda 100644 (file)
@@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
        hl_debugfs_add_file(hpriv);
 
+       hdev->open_counter++;
+       hdev->last_successful_open_jif = jiffies;
+
        return 0;
 
 out_err:
index 6604d30..f4dda7b 100644 (file)
@@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
                min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
 }
 
+static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       u32 max_size = args->return_size;
+       struct hl_open_stats_info open_stats_info = {0};
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       open_stats_info.last_open_period_ms = jiffies64_to_msecs(
+               hdev->last_open_session_duration_jif);
+       open_stats_info.open_counter = hdev->open_counter;
+
+       return copy_to_user(out, &open_stats_info,
+               min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
                                struct device *dev)
 {
@@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
        case HL_INFO_POWER:
                return power_info(hpriv, args);
 
+       case HL_INFO_OPEN_STATS:
+               return open_stats_info(hpriv, args);
+
        default:
                dev_err(dev, "Invalid request %d\n", args->op);
                rc = -ENOTTY;
index a47485a..a47a731 100644 (file)
@@ -313,6 +313,7 @@ enum hl_device_status {
  * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
  * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
  * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
+ * HL_INFO_OPEN_STATS    - Retrieve info regarding recent device open calls
  */
 #define HL_INFO_HW_IP_INFO             0
 #define HL_INFO_HW_EVENTS              1
@@ -331,6 +332,7 @@ enum hl_device_status {
 #define HL_INFO_TOTAL_ENERGY           15
 #define HL_INFO_PLL_FREQUENCY          16
 #define HL_INFO_POWER                  17
+#define HL_INFO_OPEN_STATS             18
 
 #define HL_INFO_VERSION_MAX_LEN        128
 #define HL_INFO_CARD_NAME_MAX_LEN      16
@@ -445,6 +447,16 @@ struct hl_pll_frequency_info {
 };
 
 /**
+ * struct hl_open_stats_info - device open statistics information
+ * @open_counter: ever growing counter, increased on each successful dev open
+ * @last_open_period_ms: duration (ms) device was open last time
+ */
+struct hl_open_stats_info {
+       __u64 open_counter;
+       __u64 last_open_period_ms;
+};
+
+/**
  * struct hl_power_info - power information
  * @power: power consumption
  */