afs: Don't use VL probe running state to make decisions outside probe code
authorDavid Howells <dhowells@redhat.com>
Wed, 19 Aug 2020 14:27:17 +0000 (15:27 +0100)
committerDavid Howells <dhowells@redhat.com>
Thu, 20 Aug 2020 17:21:28 +0000 (18:21 +0100)
Don't use the running state for VL server probes to make decisions about
which server to use as the state is cleared at the start of a probe and
intermediate values might also be misleading.

Instead, add a separate 'latest known' rtt in the afs_vlserver struct and a
flag to indicate if the server is known to be responding and update these
as and when we know what to change them to.

Fixes: 3bf0fb6f33dd ("afs: Probe multiple fileservers simultaneously")
Signed-off-by: David Howells <dhowells@redhat.com>
fs/afs/internal.h
fs/afs/proc.c
fs/afs/vl_list.c
fs/afs/vl_probe.c
fs/afs/vl_rotate.c

index b29dfcf..18042b7 100644 (file)
@@ -401,15 +401,17 @@ struct afs_vlserver {
 #define AFS_VLSERVER_FL_PROBED 0               /* The VL server has been probed */
 #define AFS_VLSERVER_FL_PROBING        1               /* VL server is being probed */
 #define AFS_VLSERVER_FL_IS_YFS 2               /* Server is YFS not AFS */
+#define AFS_VLSERVER_FL_RESPONDING 3           /* VL server is responding */
        rwlock_t                lock;           /* Lock on addresses */
        atomic_t                usage;
+       unsigned int            rtt;            /* Server's current RTT in uS */
 
        /* Probe state */
        wait_queue_head_t       probe_wq;
        atomic_t                probe_outstanding;
        spinlock_t              probe_lock;
        struct {
-               unsigned int    rtt;            /* RTT as ktime/64 */
+               unsigned int    rtt;            /* RTT in uS */
                u32             abort_code;
                short           error;
                unsigned short  flags;
index 5e83715..e8babb6 100644 (file)
@@ -310,7 +310,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
                                   alist->preferred == i ? '>' : '-',
                                   &alist->addrs[i].transport);
        }
-       seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->probe.rtt);
+       seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt);
        seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n",
                   vlserver->probe.flags, vlserver->probe.error,
                   vlserver->probe.abort_code,
index 8fea54e..38b2ba1 100644 (file)
@@ -21,6 +21,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
                rwlock_init(&vlserver->lock);
                init_waitqueue_head(&vlserver->probe_wq);
                spin_lock_init(&vlserver->probe_lock);
+               vlserver->rtt = UINT_MAX;
                vlserver->name_len = name_len;
                vlserver->port = port;
                memcpy(vlserver->name, name, name_len);
index a6d04b4..d1c7068 100644 (file)
 #include "internal.h"
 #include "protocol_yfs.h"
 
-static bool afs_vl_probe_done(struct afs_vlserver *server)
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_vl_probe(struct afs_vlserver *server)
 {
-       if (!atomic_dec_and_test(&server->probe_outstanding))
-               return false;
+       if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
+               server->rtt = UINT_MAX;
+               clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+       }
 
-       wake_up_var(&server->probe_outstanding);
        clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
        wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
-       return true;
+}
+
+/*
+ * Handle the completion of a probe RPC call.
+ */
+static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up)
+{
+       if (atomic_dec_and_test(&server->probe_outstanding)) {
+               afs_finished_vl_probe(server);
+               wake_up = true;
+       }
+
+       if (wake_up)
+               wake_up_all(&server->probe_wq);
 }
 
 /*
@@ -52,8 +70,13 @@ void afs_vlserver_probe_result(struct afs_call *call)
                goto responded;
        case -ENOMEM:
        case -ENONET:
+       case -EKEYEXPIRED:
+       case -EKEYREVOKED:
+       case -EKEYREJECTED:
                server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE;
-               afs_io_error(call, afs_io_error_vl_probe_fail);
+               if (server->probe.error == 0)
+                       server->probe.error = ret;
+               trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
                goto out;
        case -ECONNRESET: /* Responded, but call expired. */
        case -ERFKILL:
@@ -72,7 +95,7 @@ void afs_vlserver_probe_result(struct afs_call *call)
                     server->probe.error == -ETIMEDOUT ||
                     server->probe.error == -ETIME))
                        server->probe.error = ret;
-               afs_io_error(call, afs_io_error_vl_probe_fail);
+               trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
                goto out;
        }
 
@@ -95,22 +118,22 @@ responded:
        if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
            rtt_us < server->probe.rtt) {
                server->probe.rtt = rtt_us;
+               server->rtt = rtt_us;
                alist->preferred = index;
-               have_result = true;
        }
 
        smp_wmb(); /* Set rtt before responded. */
        server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED;
        set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+       set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+       have_result = true;
 out:
        spin_unlock(&server->probe_lock);
 
        _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
               server_index, index, &alist->addrs[index].transport, rtt_us, ret);
 
-       have_result |= afs_vl_probe_done(server);
-       if (have_result)
-               wake_up_all(&server->probe_wq);
+       afs_done_one_vl_probe(server, have_result);
 }
 
 /*
@@ -148,11 +171,10 @@ static bool afs_do_probe_vlserver(struct afs_net *net,
                        in_progress = true;
                } else {
                        afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+                       afs_done_one_vl_probe(server, false);
                }
        }
 
-       if (!in_progress)
-               afs_vl_probe_done(server);
        return in_progress;
 }
 
@@ -190,7 +212,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
 {
        struct wait_queue_entry *waits;
        struct afs_vlserver *server;
-       unsigned int rtt = UINT_MAX;
+       unsigned int rtt = UINT_MAX, rtt_s;
        bool have_responders = false;
        int pref = -1, i;
 
@@ -246,10 +268,11 @@ stop:
        for (i = 0; i < vllist->nr_servers; i++) {
                if (test_bit(i, &untried)) {
                        server = vllist->servers[i].server;
-                       if ((server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) &&
-                           server->probe.rtt < rtt) {
+                       rtt_s = READ_ONCE(server->rtt);
+                       if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) &&
+                           rtt_s < rtt) {
                                pref = i;
-                               rtt = server->probe.rtt;
+                               rtt = rtt_s;
                        }
 
                        remove_wait_queue(&server->probe_wq, &waits[i]);
index ed2609e..ab2beb4 100644 (file)
@@ -193,7 +193,7 @@ pick_server:
                struct afs_vlserver *s = vc->server_list->servers[i].server;
 
                if (!test_bit(i, &vc->untried) ||
-                   !(s->probe.flags & AFS_VLSERVER_PROBE_RESPONDED))
+                   !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
                        continue;
                if (s->probe.rtt < rtt) {
                        vc->index = i;