ibmvnic: Harden device login requests
authorThomas Falcon <tlfalcon@linux.ibm.com>
Mon, 15 Jun 2020 15:29:23 +0000 (10:29 -0500)
committerDavid S. Miller <davem@davemloft.net>
Mon, 15 Jun 2020 20:18:13 +0000 (13:18 -0700)
The VNIC driver's "login" command sequence is the final step
in the driver's initialization process with device firmware,
confirming the available device queue resources to be utilized
by the driver. Under high system load, firmware may not respond
to the request in a timely manner or may abort the request. In
such cases, the driver should reattempt the login command
sequence. In case of a device error, the number of retries
is bounded.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/ibm/ibmvnic.c

index 1b4d04e..2baf7b3 100644 (file)
@@ -842,12 +842,13 @@ static int ibmvnic_login(struct net_device *netdev)
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        unsigned long timeout = msecs_to_jiffies(30000);
        int retry_count = 0;
+       int retries = 10;
        bool retry;
        int rc;
 
        do {
                retry = false;
-               if (retry_count > IBMVNIC_MAX_QUEUES) {
+               if (retry_count > retries) {
                        netdev_warn(netdev, "Login attempts exceeded\n");
                        return -1;
                }
@@ -862,11 +863,23 @@ static int ibmvnic_login(struct net_device *netdev)
 
                if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
-                       netdev_warn(netdev, "Login timed out\n");
-                       return -1;
+                       netdev_warn(netdev, "Login timed out, retrying...\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+                       retry_count++;
+                       continue;
                }
 
-               if (adapter->init_done_rc == PARTIALSUCCESS) {
+               if (adapter->init_done_rc == ABORTED) {
+                       netdev_warn(netdev, "Login aborted, retrying...\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+                       retry_count++;
+                       /* FW or device may be busy, so
+                        * wait a bit before retrying login
+                        */
+                       msleep(500);
+               } else if (adapter->init_done_rc == PARTIALSUCCESS) {
                        retry_count++;
                        release_sub_crqs(adapter, 1);