habanalabs: rename error info structure
authorDani Liberman <dliberman@habana.ai>
Wed, 7 Sep 2022 13:15:39 +0000 (16:15 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 19 Sep 2022 12:08:39 +0000 (15:08 +0300)
As a preparation for adding more errors to it,
change to more suitable name.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/habanalabs_ioctl.c
drivers/misc/habanalabs/gaudi/gaudi.c

index 746b688..fbe5003 100644 (file)
@@ -826,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
        }
 
        /* Save only the first CS timeout parameters */
-       rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
+       rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
        if (rc) {
-               hdev->last_error.cs_timeout.timestamp = ktime_get();
-               hdev->last_error.cs_timeout.seq = cs->sequence;
+               hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
+               hdev->captured_err_info.cs_timeout.seq = cs->sequence;
 
                event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
                                HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
index 9c2123d..44050d4 100644 (file)
@@ -2981,12 +2981,12 @@ struct undefined_opcode_info {
 };
 
 /**
- * struct last_error_session_info - info about last session errors occurred.
- * @cs_timeout: CS timeout error last information.
- * @razwi: razwi last information.
+ * struct hl_error_info - holds information collected during an error.
+ * @cs_timeout: CS timeout error information.
+ * @razwi: razwi information.
  * @undef_opcode: undefined opcode information
  */
-struct last_error_session_info {
+struct hl_error_info {
        struct cs_timeout_info          cs_timeout;
        struct razwi_info               razwi;
        struct undefined_opcode_info    undef_opcode;
@@ -3111,7 +3111,7 @@ struct hl_reset_info {
  * @state_dump_specs: constants and dictionaries needed to dump system state.
  * @multi_cs_completion: array of multi-CS completion.
  * @clk_throttling: holds information about current/previous clock throttling events
- * @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @captured_err_info: holds information about errors.
  * @reset_info: holds current device reset information.
  * @stream_master_qid_arr: pointer to array with QIDs of master streams.
  * @fw_major_version: major version of current loaded preboot.
@@ -3286,7 +3286,7 @@ struct hl_device {
        struct multi_cs_completion      multi_cs_completion[
                                                        MULTI_CS_MAX_USER_CTX];
        struct hl_clk_throttle          clk_throttling;
-       struct last_error_session_info  last_error;
+       struct hl_error_info            captured_err_info;
 
        struct hl_reset_info            reset_info;
 
index fd9c868..5250bfb 100644 (file)
@@ -211,9 +211,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
        hl_debugfs_add_file(hpriv);
 
-       atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
-       atomic_set(&hdev->last_error.razwi.write_enable, 1);
-       hdev->last_error.undef_opcode.write_enable = true;
+       atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
+       atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
+       hdev->captured_err_info.undef_opcode.write_enable = true;
 
        hdev->open_counter++;
        hdev->last_successful_open_jif = jiffies;
index c7bd000..ab0be08 100644 (file)
@@ -593,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
        if ((!max_size) || (!out))
                return -EINVAL;
 
-       info.seq = hdev->last_error.cs_timeout.seq;
-       info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
+       info.seq = hdev->captured_err_info.cs_timeout.seq;
+       info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
 
        return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
 }
@@ -609,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
        if ((!max_size) || (!out))
                return -EINVAL;
 
-       info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
-       info.addr = hdev->last_error.razwi.addr;
-       info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
-       info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
-       info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
-       info.error_type = hdev->last_error.razwi.type;
+       info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
+       info.addr = hdev->captured_err_info.razwi.addr;
+       info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
+       info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
+       info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
+       info.error_type = hdev->captured_err_info.razwi.type;
 
        return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
 }
@@ -629,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
        if ((!max_size) || (!out))
                return -EINVAL;
 
-       info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp);
-       info.engine_id = hdev->last_error.undef_opcode.engine_id;
-       info.cq_addr = hdev->last_error.undef_opcode.cq_addr;
-       info.cq_size = hdev->last_error.undef_opcode.cq_size;
-       info.stream_id = hdev->last_error.undef_opcode.stream_id;
-       info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len;
-       memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams,
+       info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
+       info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
+       info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
+       info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
+       info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
+       info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
+       memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
                        sizeof(info.cb_addr_streams));
 
        return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
index 48ff3b1..f81a141 100644 (file)
@@ -6894,9 +6894,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea
                                                        stream, cq_ptr, size);
 
        if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-               hdev->last_error.undef_opcode.cq_addr = cq_ptr;
-               hdev->last_error.undef_opcode.cq_size = size;
-               hdev->last_error.undef_opcode.stream_id = stream;
+               hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+               hdev->captured_err_info.undef_opcode.cq_size = size;
+               hdev->captured_err_info.undef_opcode.stream_id = stream;
        }
 }
 
@@ -6962,7 +6962,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
        }
 
        if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-               struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
+               struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
                u32 arr_idx = undef_opcode->cb_addr_streams_len;
 
                if (arr_idx == 0) {
@@ -7046,11 +7046,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
                }
                /* check for undefined opcode */
                if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
-                               hdev->last_error.undef_opcode.write_enable) {
-                       memset(&hdev->last_error.undef_opcode, 0,
-                                               sizeof(hdev->last_error.undef_opcode));
+                               hdev->captured_err_info.undef_opcode.write_enable) {
+                       memset(&hdev->captured_err_info.undef_opcode, 0,
+                                               sizeof(hdev->captured_err_info.undef_opcode));
 
-                       hdev->last_error.undef_opcode.write_enable = false;
+                       hdev->captured_err_info.undef_opcode.write_enable = false;
                        *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
                }
 
@@ -7332,18 +7332,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
                gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
 
                /* In case it's the first razwi, save its parameters*/
-               rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
+               rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
                if (rc) {
-                       hdev->last_error.razwi.timestamp = ktime_get();
-                       hdev->last_error.razwi.addr = razwi_addr;
-                       hdev->last_error.razwi.engine_id_1 = engine_id_1;
-                       hdev->last_error.razwi.engine_id_2 = engine_id_2;
+                       hdev->captured_err_info.razwi.timestamp = ktime_get();
+                       hdev->captured_err_info.razwi.addr = razwi_addr;
+                       hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
+                       hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
                        /*
                         * If first engine id holds non valid value the razwi initiator
                         * does not have engine id
                         */
-                       hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
-                       hdev->last_error.razwi.type = razwi_type;
+                       hdev->captured_err_info.razwi.non_engine_initiator =
+                                                                       (engine_id_1 == U16_MAX);
+                       hdev->captured_err_info.razwi.type = razwi_type;
 
                }
        }