IB/hfi1: Fix infinite loop in 8051 command error path
authorSebastian Sanchez <sebastian.sanchez@intel.com>
Tue, 19 Dec 2017 03:56:59 +0000 (19:56 -0800)
committerDoug Ledford <dledford@redhat.com>
Fri, 5 Jan 2018 18:34:55 +0000 (13:34 -0500)
When an 8051 command times out, the entire DC block is restarted. During
the restart, the host interface version bit is set, which calls
do_8051_command() recursively. The host version bit needs to be set
before the link moves into polling, so the host version bit can be set
in set_local_link_attributes() instead. Thus, the 8051 command functions
can be simplied as a non-locking version (dd->dc8051_lock) of those
functions are no longer needed.

Fixes: 9be6a5d788b0 ("IB/hfi1: Prevent LNI out of sync by resetting host interface version")
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/firmware.c

index 4f057e8..ef0939c 100644 (file)
@@ -6518,11 +6518,12 @@ static void _dc_start(struct hfi1_devdata *dd)
        if (!dd->dc_shutdown)
                return;
 
-       /*
-        * Take the 8051 out of reset, wait until 8051 is ready, and set host
-        * version bit.
-        */
-       release_and_wait_ready_8051_firmware(dd);
+       /* Take the 8051 out of reset */
+       write_csr(dd, DC_DC8051_CFG_RST, 0ull);
+       /* Wait until 8051 is ready */
+       if (wait_fm_ready(dd, TIMEOUT_8051_START))
+               dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
+                          __func__);
 
        /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
        write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -8564,23 +8565,27 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
 }
 
 /*
- * If the 8051 is in reset mode (dd->dc_shutdown == 1), this function
- * will still continue executing.
- *
  * Returns:
  *     < 0 = Linux error, not able to get access
  *     > 0 = 8051 command RETURN_CODE
  */
-static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
-                           u64 *out_data)
+static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
+                          u64 *out_data)
 {
        u64 reg, completed;
        int return_code;
        unsigned long timeout;
 
-       lockdep_assert_held(&dd->dc8051_lock);
        hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
 
+       mutex_lock(&dd->dc8051_lock);
+
+       /* We can't send any commands to the 8051 if it's in reset */
+       if (dd->dc_shutdown) {
+               return_code = -ENODEV;
+               goto fail;
+       }
+
        /*
         * If an 8051 host command timed out previously, then the 8051 is
         * stuck.
@@ -8681,29 +8686,6 @@ static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
        write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
 
 fail:
-       return return_code;
-}
-
-/*
- * Returns:
- *     < 0 = Linux error, not able to get access
- *     > 0 = 8051 command RETURN_CODE
- */
-static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
-                          u64 *out_data)
-{
-       int return_code;
-
-       mutex_lock(&dd->dc8051_lock);
-       /* We can't send any commands to the 8051 if it's in reset */
-       if (dd->dc_shutdown) {
-               return_code = -ENODEV;
-               goto fail;
-       }
-
-       return_code = _do_8051_command(dd, type, in_data, out_data);
-
-fail:
        mutex_unlock(&dd->dc8051_lock);
        return return_code;
 }
@@ -8713,17 +8695,16 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
        return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
 }
 
-static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
-                            u8 lane_id, u32 config_data)
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+                    u8 lane_id, u32 config_data)
 {
        u64 data;
        int ret;
 
-       lockdep_assert_held(&dd->dc8051_lock);
        data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
                | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
                | (u64)config_data << LOAD_DATA_DATA_SHIFT;
-       ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
+       ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
        if (ret != HCMD_SUCCESS) {
                dd_dev_err(dd,
                           "load 8051 config: field id %d, lane %d, err %d\n",
@@ -8732,18 +8713,6 @@ static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
        return ret;
 }
 
-int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
-                    u8 lane_id, u32 config_data)
-{
-       int return_code;
-
-       mutex_lock(&dd->dc8051_lock);
-       return_code = _load_8051_config(dd, field_id, lane_id, config_data);
-       mutex_unlock(&dd->dc8051_lock);
-
-       return return_code;
-}
-
 /*
  * Read the 8051 firmware "registers".  Use the RAM directly.  Always
  * set the result, even on error.
@@ -8859,14 +8828,13 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
        u32 frame;
        u32 mask;
 
-       lockdep_assert_held(&dd->dc8051_lock);
        mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
        read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
        /* Clear, then set field */
        frame &= ~mask;
        frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
-       return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
-                                frame);
+       return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
+                               frame);
 }
 
 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
@@ -9270,6 +9238,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
        if (ret != HCMD_SUCCESS)
                goto set_local_link_attributes_fail;
 
+       ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+       if (ret != HCMD_SUCCESS) {
+               dd_dev_err(dd,
+                          "Failed to set host interface version, return 0x%x\n",
+                          ret);
+               goto set_local_link_attributes_fail;
+       }
+
        /*
         * DC supports continuous updates.
         */
index 133e313..21fca8e 100644 (file)
 #define DOWN_REMOTE_REASON_SHIFT 16
 #define DOWN_REMOTE_REASON_MASK  0xff
 
+#define HOST_INTERFACE_VERSION 1
 #define HOST_INTERFACE_VERSION_SHIFT 16
 #define HOST_INTERFACE_VERSION_MASK  0xff
 
@@ -713,7 +714,6 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
                      u8 *ver_patch);
 int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
 void read_guid(struct hfi1_devdata *dd);
-int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd);
 int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
                          u8 neigh_reason, u8 rem_reason);
index 98868df..2b57ba7 100644 (file)
@@ -68,7 +68,6 @@
 #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
 #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
 #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
-#define HOST_INTERFACE_VERSION 1
 
 MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC);
 MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME);
@@ -976,46 +975,6 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout)
 }
 
 /*
- * Clear all reset bits, releasing the 8051.
- * Wait for firmware to be ready to accept host requests.
- * Then, set host version bit.
- *
- * This function executes even if the 8051 is in reset mode when
- * dd->dc_shutdown == 1.
- *
- * Expects dd->dc8051_lock to be held.
- */
-int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd)
-{
-       int ret;
-
-       lockdep_assert_held(&dd->dc8051_lock);
-       /* clear all reset bits, releasing the 8051 */
-       write_csr(dd, DC_DC8051_CFG_RST, 0ull);
-
-       /*
-        * Wait for firmware to be ready to accept host
-        * requests.
-        */
-       ret = wait_fm_ready(dd, TIMEOUT_8051_START);
-       if (ret) {
-               dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n",
-                          get_firmware_state(dd));
-               return ret;
-       }
-
-       ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
-       if (ret != HCMD_SUCCESS) {
-               dd_dev_err(dd,
-                          "Failed to set host interface version, return 0x%x\n",
-                          ret);
-               return -EIO;
-       }
-
-       return 0;
-}
-
-/*
  * Load the 8051 firmware.
  */
 static int load_8051_firmware(struct hfi1_devdata *dd,
@@ -1080,22 +1039,31 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
        if (ret)
                return ret;
 
+       /* clear all reset bits, releasing the 8051 */
+       write_csr(dd, DC_DC8051_CFG_RST, 0ull);
+
        /*
-        * Clear all reset bits, releasing the 8051.
         * DC reset step 5. Wait for firmware to be ready to accept host
         * requests.
-        * Then, set host version bit.
         */
-       mutex_lock(&dd->dc8051_lock);
-       ret = release_and_wait_ready_8051_firmware(dd);
-       mutex_unlock(&dd->dc8051_lock);
-       if (ret)
-               return ret;
+       ret = wait_fm_ready(dd, TIMEOUT_8051_START);
+       if (ret) { /* timed out */
+               dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
+                          get_firmware_state(dd));
+               return -ETIMEDOUT;
+       }
 
        read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
        dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
                    (int)ver_major, (int)ver_minor, (int)ver_patch);
        dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
+       ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+       if (ret != HCMD_SUCCESS) {
+               dd_dev_err(dd,
+                          "Failed to set host interface version, return 0x%x\n",
+                          ret);
+               return -EIO;
+       }
 
        return 0;
 }