Merge tag 'for-5.4-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[platform/kernel/linux-rpi.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20         atomic_inc(&net->servers_outstanding);
21 }
22
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25         if (atomic_dec_and_test(&net->servers_outstanding))
26                 wake_up_var(&net->servers_outstanding);
27 }
28
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33                                    const struct sockaddr_rxrpc *srx)
34 {
35         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
36         const struct afs_addr_list *alist;
37         struct afs_server *server = NULL;
38         unsigned int i;
39         bool ipv6 = true;
40         int seq = 0, diff;
41
42         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
43             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
44             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
45                 ipv6 = false;
46
47         rcu_read_lock();
48
49         do {
50                 if (server)
51                         afs_put_server(net, server, afs_server_trace_put_find_rsq);
52                 server = NULL;
53                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
54
55                 if (ipv6) {
56                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
57                                 alist = rcu_dereference(server->addresses);
58                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
59                                         b = &alist->addrs[i].transport.sin6;
60                                         diff = ((u16 __force)a->sin6_port -
61                                                 (u16 __force)b->sin6_port);
62                                         if (diff == 0)
63                                                 diff = memcmp(&a->sin6_addr,
64                                                               &b->sin6_addr,
65                                                               sizeof(struct in6_addr));
66                                         if (diff == 0)
67                                                 goto found;
68                                 }
69                         }
70                 } else {
71                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
72                                 alist = rcu_dereference(server->addresses);
73                                 for (i = 0; i < alist->nr_ipv4; i++) {
74                                         b = &alist->addrs[i].transport.sin6;
75                                         diff = ((u16 __force)a->sin6_port -
76                                                 (u16 __force)b->sin6_port);
77                                         if (diff == 0)
78                                                 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
79                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
80                                         if (diff == 0)
81                                                 goto found;
82                                 }
83                         }
84                 }
85
86                 server = NULL;
87         found:
88                 if (server && !atomic_inc_not_zero(&server->usage))
89                         server = NULL;
90
91         } while (need_seqretry(&net->fs_addr_lock, seq));
92
93         done_seqretry(&net->fs_addr_lock, seq);
94
95         rcu_read_unlock();
96         return server;
97 }
98
99 /*
100  * Look up a server by its UUID
101  */
102 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
103 {
104         struct afs_server *server = NULL;
105         struct rb_node *p;
106         int diff, seq = 0;
107
108         _enter("%pU", uuid);
109
110         do {
111                 /* Unfortunately, rbtree walking doesn't give reliable results
112                  * under just the RCU read lock, so we have to check for
113                  * changes.
114                  */
115                 if (server)
116                         afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
117                 server = NULL;
118
119                 read_seqbegin_or_lock(&net->fs_lock, &seq);
120
121                 p = net->fs_servers.rb_node;
122                 while (p) {
123                         server = rb_entry(p, struct afs_server, uuid_rb);
124
125                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
126                         if (diff < 0) {
127                                 p = p->rb_left;
128                         } else if (diff > 0) {
129                                 p = p->rb_right;
130                         } else {
131                                 afs_get_server(server, afs_server_trace_get_by_uuid);
132                                 break;
133                         }
134
135                         server = NULL;
136                 }
137         } while (need_seqretry(&net->fs_lock, seq));
138
139         done_seqretry(&net->fs_lock, seq);
140
141         _leave(" = %p", server);
142         return server;
143 }
144
145 /*
146  * Install a server record in the namespace tree
147  */
148 static struct afs_server *afs_install_server(struct afs_net *net,
149                                              struct afs_server *candidate)
150 {
151         const struct afs_addr_list *alist;
152         struct afs_server *server;
153         struct rb_node **pp, *p;
154         int ret = -EEXIST, diff;
155
156         _enter("%p", candidate);
157
158         write_seqlock(&net->fs_lock);
159
160         /* Firstly install the server in the UUID lookup tree */
161         pp = &net->fs_servers.rb_node;
162         p = NULL;
163         while (*pp) {
164                 p = *pp;
165                 _debug("- consider %p", p);
166                 server = rb_entry(p, struct afs_server, uuid_rb);
167                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
168                 if (diff < 0)
169                         pp = &(*pp)->rb_left;
170                 else if (diff > 0)
171                         pp = &(*pp)->rb_right;
172                 else
173                         goto exists;
174         }
175
176         server = candidate;
177         rb_link_node(&server->uuid_rb, p, pp);
178         rb_insert_color(&server->uuid_rb, &net->fs_servers);
179         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
180
181         write_seqlock(&net->fs_addr_lock);
182         alist = rcu_dereference_protected(server->addresses,
183                                           lockdep_is_held(&net->fs_addr_lock.lock));
184
185         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
186          * it in the IPv4 and/or IPv6 reverse-map lists.
187          *
188          * TODO: For speed we want to use something other than a flat list
189          * here; even sorting the list in terms of lowest address would help a
190          * bit, but anything we might want to do gets messy and memory
191          * intensive.
192          */
193         if (alist->nr_ipv4 > 0)
194                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
195         if (alist->nr_addrs > alist->nr_ipv4)
196                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
197
198         write_sequnlock(&net->fs_addr_lock);
199         ret = 0;
200
201 exists:
202         afs_get_server(server, afs_server_trace_get_install);
203         write_sequnlock(&net->fs_lock);
204         return server;
205 }
206
207 /*
208  * allocate a new server record
209  */
210 static struct afs_server *afs_alloc_server(struct afs_net *net,
211                                            const uuid_t *uuid,
212                                            struct afs_addr_list *alist)
213 {
214         struct afs_server *server;
215
216         _enter("");
217
218         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
219         if (!server)
220                 goto enomem;
221
222         atomic_set(&server->usage, 1);
223         server->debug_id = atomic_inc_return(&afs_server_debug_id);
224         RCU_INIT_POINTER(server->addresses, alist);
225         server->addr_version = alist->version;
226         server->uuid = *uuid;
227         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
228         rwlock_init(&server->fs_lock);
229         INIT_HLIST_HEAD(&server->cb_volumes);
230         rwlock_init(&server->cb_break_lock);
231         init_waitqueue_head(&server->probe_wq);
232         spin_lock_init(&server->probe_lock);
233
234         afs_inc_servers_outstanding(net);
235         trace_afs_server(server, 1, afs_server_trace_alloc);
236         _leave(" = %p", server);
237         return server;
238
239 enomem:
240         _leave(" = NULL [nomem]");
241         return NULL;
242 }
243
244 /*
245  * Look up an address record for a server
246  */
247 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
248                                                  struct key *key, const uuid_t *uuid)
249 {
250         struct afs_vl_cursor vc;
251         struct afs_addr_list *alist = NULL;
252         int ret;
253
254         ret = -ERESTARTSYS;
255         if (afs_begin_vlserver_operation(&vc, cell, key)) {
256                 while (afs_select_vlserver(&vc)) {
257                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
258                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
259                         else
260                                 alist = afs_vl_get_addrs_u(&vc, uuid);
261                 }
262
263                 ret = afs_end_vlserver_operation(&vc);
264         }
265
266         return ret < 0 ? ERR_PTR(ret) : alist;
267 }
268
269 /*
270  * Get or create a fileserver record.
271  */
272 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
273                                      const uuid_t *uuid)
274 {
275         struct afs_addr_list *alist;
276         struct afs_server *server, *candidate;
277
278         _enter("%p,%pU", cell->net, uuid);
279
280         server = afs_find_server_by_uuid(cell->net, uuid);
281         if (server)
282                 return server;
283
284         alist = afs_vl_lookup_addrs(cell, key, uuid);
285         if (IS_ERR(alist))
286                 return ERR_CAST(alist);
287
288         candidate = afs_alloc_server(cell->net, uuid, alist);
289         if (!candidate) {
290                 afs_put_addrlist(alist);
291                 return ERR_PTR(-ENOMEM);
292         }
293
294         server = afs_install_server(cell->net, candidate);
295         if (server != candidate) {
296                 afs_put_addrlist(alist);
297                 kfree(candidate);
298         }
299
300         _leave(" = %p{%d}", server, atomic_read(&server->usage));
301         return server;
302 }
303
304 /*
305  * Set the server timer to fire after a given delay, assuming it's not already
306  * set for an earlier time.
307  */
308 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
309 {
310         if (net->live) {
311                 afs_inc_servers_outstanding(net);
312                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
313                         afs_dec_servers_outstanding(net);
314         }
315 }
316
317 /*
318  * Server management timer.  We have an increment on fs_outstanding that we
319  * need to pass along to the work item.
320  */
321 void afs_servers_timer(struct timer_list *timer)
322 {
323         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
324
325         _enter("");
326         if (!queue_work(afs_wq, &net->fs_manager))
327                 afs_dec_servers_outstanding(net);
328 }
329
330 /*
331  * Get a reference on a server object.
332  */
333 struct afs_server *afs_get_server(struct afs_server *server,
334                                   enum afs_server_trace reason)
335 {
336         unsigned int u = atomic_inc_return(&server->usage);
337
338         trace_afs_server(server, u, reason);
339         return server;
340 }
341
342 /*
343  * Release a reference on a server record.
344  */
345 void afs_put_server(struct afs_net *net, struct afs_server *server,
346                     enum afs_server_trace reason)
347 {
348         unsigned int usage;
349
350         if (!server)
351                 return;
352
353         server->put_time = ktime_get_real_seconds();
354
355         usage = atomic_dec_return(&server->usage);
356
357         trace_afs_server(server, usage, reason);
358
359         if (likely(usage > 0))
360                 return;
361
362         afs_set_server_timer(net, afs_server_gc_delay);
363 }
364
365 static void afs_server_rcu(struct rcu_head *rcu)
366 {
367         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
368
369         trace_afs_server(server, atomic_read(&server->usage),
370                          afs_server_trace_free);
371         afs_put_addrlist(rcu_access_pointer(server->addresses));
372         kfree(server);
373 }
374
375 /*
376  * destroy a dead server
377  */
378 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
379 {
380         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
381         struct afs_addr_cursor ac = {
382                 .alist  = alist,
383                 .index  = alist->preferred,
384                 .error  = 0,
385         };
386
387         trace_afs_server(server, atomic_read(&server->usage),
388                          afs_server_trace_give_up_cb);
389
390         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
391                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
392
393         wait_var_event(&server->probe_outstanding,
394                        atomic_read(&server->probe_outstanding) == 0);
395
396         trace_afs_server(server, atomic_read(&server->usage),
397                          afs_server_trace_destroy);
398         call_rcu(&server->rcu, afs_server_rcu);
399         afs_dec_servers_outstanding(net);
400 }
401
402 /*
403  * Garbage collect any expired servers.
404  */
405 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
406 {
407         struct afs_server *server;
408         bool deleted;
409         int usage;
410
411         while ((server = gc_list)) {
412                 gc_list = server->gc_next;
413
414                 write_seqlock(&net->fs_lock);
415                 usage = 1;
416                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
417                 trace_afs_server(server, usage, afs_server_trace_gc);
418                 if (deleted) {
419                         rb_erase(&server->uuid_rb, &net->fs_servers);
420                         hlist_del_rcu(&server->proc_link);
421                 }
422                 write_sequnlock(&net->fs_lock);
423
424                 if (deleted) {
425                         write_seqlock(&net->fs_addr_lock);
426                         if (!hlist_unhashed(&server->addr4_link))
427                                 hlist_del_rcu(&server->addr4_link);
428                         if (!hlist_unhashed(&server->addr6_link))
429                                 hlist_del_rcu(&server->addr6_link);
430                         write_sequnlock(&net->fs_addr_lock);
431                         afs_destroy_server(net, server);
432                 }
433         }
434 }
435
436 /*
437  * Manage the records of servers known to be within a network namespace.  This
438  * includes garbage collecting unused servers.
439  *
440  * Note also that we were given an increment on net->servers_outstanding by
441  * whoever queued us that we need to deal with before returning.
442  */
443 void afs_manage_servers(struct work_struct *work)
444 {
445         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
446         struct afs_server *gc_list = NULL;
447         struct rb_node *cursor;
448         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
449         bool purging = !net->live;
450
451         _enter("");
452
453         /* Trawl the server list looking for servers that have expired from
454          * lack of use.
455          */
456         read_seqlock_excl(&net->fs_lock);
457
458         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
459                 struct afs_server *server =
460                         rb_entry(cursor, struct afs_server, uuid_rb);
461                 int usage = atomic_read(&server->usage);
462
463                 _debug("manage %pU %u", &server->uuid, usage);
464
465                 ASSERTCMP(usage, >=, 1);
466                 ASSERTIFCMP(purging, usage, ==, 1);
467
468                 if (usage == 1) {
469                         time64_t expire_at = server->put_time;
470
471                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
472                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
473                                 expire_at += afs_server_gc_delay;
474                         if (purging || expire_at <= now) {
475                                 server->gc_next = gc_list;
476                                 gc_list = server;
477                         } else if (expire_at < next_manage) {
478                                 next_manage = expire_at;
479                         }
480                 }
481         }
482
483         read_sequnlock_excl(&net->fs_lock);
484
485         /* Update the timer on the way out.  We have to pass an increment on
486          * servers_outstanding in the namespace that we are in to the timer or
487          * the work scheduler.
488          */
489         if (!purging && next_manage < TIME64_MAX) {
490                 now = ktime_get_real_seconds();
491
492                 if (next_manage - now <= 0) {
493                         if (queue_work(afs_wq, &net->fs_manager))
494                                 afs_inc_servers_outstanding(net);
495                 } else {
496                         afs_set_server_timer(net, next_manage - now);
497                 }
498         }
499
500         afs_gc_servers(net, gc_list);
501
502         afs_dec_servers_outstanding(net);
503         _leave(" [%d]", atomic_read(&net->servers_outstanding));
504 }
505
506 static void afs_queue_server_manager(struct afs_net *net)
507 {
508         afs_inc_servers_outstanding(net);
509         if (!queue_work(afs_wq, &net->fs_manager))
510                 afs_dec_servers_outstanding(net);
511 }
512
513 /*
514  * Purge list of servers.
515  */
516 void afs_purge_servers(struct afs_net *net)
517 {
518         _enter("");
519
520         if (del_timer_sync(&net->fs_timer))
521                 atomic_dec(&net->servers_outstanding);
522
523         afs_queue_server_manager(net);
524
525         _debug("wait");
526         wait_var_event(&net->servers_outstanding,
527                        !atomic_read(&net->servers_outstanding));
528         _leave("");
529 }
530
531 /*
532  * Get an update for a server's address list.
533  */
534 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
535 {
536         struct afs_addr_list *alist, *discard;
537
538         _enter("");
539
540         trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
541
542         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
543                                     &server->uuid);
544         if (IS_ERR(alist)) {
545                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
546                      PTR_ERR(alist) == -EINTR) &&
547                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
548                     server->addresses) {
549                         _leave(" = t [intr]");
550                         return true;
551                 }
552                 fc->error = PTR_ERR(alist);
553                 _leave(" = f [%d]", fc->error);
554                 return false;
555         }
556
557         discard = alist;
558         if (server->addr_version != alist->version) {
559                 write_lock(&server->fs_lock);
560                 discard = rcu_dereference_protected(server->addresses,
561                                                     lockdep_is_held(&server->fs_lock));
562                 rcu_assign_pointer(server->addresses, alist);
563                 server->addr_version = alist->version;
564                 write_unlock(&server->fs_lock);
565         }
566
567         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
568         afs_put_addrlist(discard);
569         _leave(" = t");
570         return true;
571 }
572
573 /*
574  * See if a server's address list needs updating.
575  */
576 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
577 {
578         time64_t now = ktime_get_real_seconds();
579         long diff;
580         bool success;
581         int ret, retries = 0;
582
583         _enter("");
584
585         ASSERT(server);
586
587 retry:
588         diff = READ_ONCE(server->update_at) - now;
589         if (diff > 0) {
590                 _leave(" = t [not now %ld]", diff);
591                 return true;
592         }
593
594         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
595                 success = afs_update_server_record(fc, server);
596                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
597                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
598                 _leave(" = %d", success);
599                 return success;
600         }
601
602         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
603                           TASK_INTERRUPTIBLE);
604         if (ret == -ERESTARTSYS) {
605                 if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
606                         _leave(" = t [intr]");
607                         return true;
608                 }
609                 fc->error = ret;
610                 _leave(" = f [intr]");
611                 return false;
612         }
613
614         retries++;
615         if (retries == 4) {
616                 _leave(" = f [stale]");
617                 ret = -ESTALE;
618                 return false;
619         }
620         goto retry;
621 }