staging: lustre: ksocklnd: ignore timedout TX on closing connection
authorLiang Zhen <liang.zhen@intel.com>
Sun, 29 Jan 2017 00:05:16 +0000 (19:05 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Feb 2017 12:01:38 +0000 (13:01 +0100)
ksocklnd reaper thread always tries to close the connection for the
first timedout zero-copy TX. This is wrong if this connection is
already being closed, because the reaper will see the same TX again
and again and cannot find out other timedout zero-copy TXs and close
connections for them.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-8867
Reviewed-on: https://review.whamcloud.com/23973
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c

index 6da9f31..cbac884 100644 (file)
@@ -2456,6 +2456,7 @@ ksocknal_check_peer_timeouts(int idx)
 
        list_for_each_entry(peer, peers, ksnp_list) {
                unsigned long deadline = 0;
+               struct ksock_tx *tx_stale;
                int resid = 0;
                int n = 0;
 
@@ -2503,6 +2504,7 @@ ksocknal_check_peer_timeouts(int idx)
                if (list_empty(&peer->ksnp_zc_req_list))
                        continue;
 
+               tx_stale = NULL;
                spin_lock(&peer->ksnp_lock);
                list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
                        if (!cfs_time_aftereq(cfs_time_current(),
@@ -2511,26 +2513,26 @@ ksocknal_check_peer_timeouts(int idx)
                        /* ignore the TX if connection is being closed */
                        if (tx->tx_conn->ksnc_closing)
                                continue;
+                       if (!tx_stale)
+                               tx_stale = tx;
                        n++;
                }
 
-               if (!n) {
+               if (!tx_stale) {
                        spin_unlock(&peer->ksnp_lock);
                        continue;
                }
 
-               tx = list_entry(peer->ksnp_zc_req_list.next,
-                               struct ksock_tx, tx_zc_list);
-               deadline = tx->tx_deadline;
-               resid = tx->tx_resid;
-               conn = tx->tx_conn;
+               deadline = tx_stale->tx_deadline;
+               resid = tx_stale->tx_resid;
+               conn = tx_stale->tx_conn;
                ksocknal_conn_addref(conn);
 
                spin_unlock(&peer->ksnp_lock);
                read_unlock(&ksocknal_data.ksnd_global_lock);
 
                CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n",
-                      n, libcfs_nid2str(peer->ksnp_id.nid), tx,
+                      n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale,
                       cfs_duration_sec(cfs_time_current() - deadline),
                       resid, conn->ksnc_sock->sk->sk_wmem_queued);