4 * Copyright (c) 2016, Ericsson AB
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
36 #include <net/genetlink.h>
42 #define MAX_MON_DOMAIN 64
43 #define MON_TIMEOUT 120000
44 #define MAX_PEER_DOWN_EVENTS 4
46 /* struct tipc_mon_domain: domain record to be transferred between peers
47 * @len: actual size of domain record
48 * @gen: current generation of sender's domain
49 * @ack_gen: most recent generation of self's domain acked by peer
50 * @member_cnt: number of domain member nodes described in this record
51 * @up_map: bit map indicating which of the members the sender considers up
52 * @members: identity of the domain members
54 struct tipc_mon_domain {
60 u32 members[MAX_MON_DOMAIN];
63 /* struct tipc_peer: state of a peer node and its domain
64 * @addr: tipc node identity of peer
65 * @head_map: shows which other nodes currently consider peer 'up'
66 * @domain: most recent domain record from peer
67 * @hash: position in hashed lookup list
68 * @list: position in linked list, in circular ascending order by 'addr'
69 * @applied: number of reported domain members applied on this monitor list
70 * @is_up: peer is up as seen from this node
71 * @is_head: peer is assigned domain head as seen from this node
72 * @is_local: peer is in local domain and should be continuously monitored
73 * @down_cnt: - numbers of other peers which have reported this on lost
77 struct tipc_mon_domain *domain;
78 struct hlist_node hash;
79 struct list_head list;
88 struct hlist_head peers[NODE_HTABLE_SIZE];
90 struct tipc_peer *self;
92 struct tipc_mon_domain cache;
96 struct timer_list timer;
97 unsigned long timer_intv;
100 static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
102 return tipc_net(net)->monitors[bearer_id];
105 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
107 /* dom_rec_len(): actual length of domain record for transport
109 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
111 return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
114 /* dom_size() : calculate size of own domain based on number of peers
116 static int dom_size(int peers)
120 while ((i * i) < peers)
122 return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
125 static void map_set(u64 *up_map, int i, unsigned int v)
127 *up_map &= ~(1ULL << i);
128 *up_map |= ((u64)v << i);
131 static int map_get(u64 up_map, int i)
133 return (up_map & (1 << i)) >> i;
136 static struct tipc_peer *peer_prev(struct tipc_peer *peer)
138 return list_last_entry(&peer->list, struct tipc_peer, list);
141 static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
143 return list_first_entry(&peer->list, struct tipc_peer, list);
146 static struct tipc_peer *peer_head(struct tipc_peer *peer)
148 while (!peer->is_head)
149 peer = peer_prev(peer);
153 static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
155 struct tipc_peer *peer;
156 unsigned int thash = tipc_hashfn(addr);
158 hlist_for_each_entry(peer, &mon->peers[thash], hash) {
159 if (peer->addr == addr)
165 static struct tipc_peer *get_self(struct net *net, int bearer_id)
167 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
172 static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
174 struct tipc_net *tn = tipc_net(net);
176 return mon->peer_cnt > tn->mon_threshold;
179 /* mon_identify_lost_members() : - identify amd mark potentially lost members
181 static void mon_identify_lost_members(struct tipc_peer *peer,
182 struct tipc_mon_domain *dom_bef,
185 struct tipc_peer *member = peer;
186 struct tipc_mon_domain *dom_aft = peer->domain;
187 int applied_aft = peer->applied;
190 for (i = 0; i < applied_bef; i++) {
191 member = peer_nxt(member);
193 /* Do nothing if self or peer already see member as down */
194 if (!member->is_up || !map_get(dom_bef->up_map, i))
197 /* Loss of local node must be detected by active probing */
198 if (member->is_local)
201 /* Start probing if member was removed from applied domain */
202 if (!applied_aft || (applied_aft < i)) {
203 member->down_cnt = 1;
207 /* Member loss is confirmed if it is still in applied domain */
208 if (!map_get(dom_aft->up_map, i))
213 /* mon_apply_domain() : match a peer's domain record against monitor list
215 static void mon_apply_domain(struct tipc_monitor *mon,
216 struct tipc_peer *peer)
218 struct tipc_mon_domain *dom = peer->domain;
219 struct tipc_peer *member;
223 if (!dom || !peer->is_up)
226 /* Scan across domain members and match against monitor list */
228 member = peer_nxt(peer);
229 for (i = 0; i < dom->member_cnt; i++) {
230 addr = dom->members[i];
231 if (addr != member->addr)
234 member = peer_nxt(member);
238 /* mon_update_local_domain() : update after peer addition/removal/up/down
240 static void mon_update_local_domain(struct tipc_monitor *mon)
242 struct tipc_peer *self = mon->self;
243 struct tipc_mon_domain *cache = &mon->cache;
244 struct tipc_mon_domain *dom = self->domain;
245 struct tipc_peer *peer = self;
246 u64 prev_up_map = dom->up_map;
250 /* Update local domain size based on current size of cluster */
251 member_cnt = dom_size(mon->peer_cnt) - 1;
252 self->applied = member_cnt;
254 /* Update native and cached outgoing local domain records */
255 dom->len = dom_rec_len(dom, member_cnt);
256 diff = dom->member_cnt != member_cnt;
257 dom->member_cnt = member_cnt;
258 for (i = 0; i < member_cnt; i++) {
259 peer = peer_nxt(peer);
260 diff |= dom->members[i] != peer->addr;
261 dom->members[i] = peer->addr;
262 map_set(&dom->up_map, i, peer->is_up);
263 cache->members[i] = htonl(peer->addr);
265 diff |= dom->up_map != prev_up_map;
268 dom->gen = ++mon->dom_gen;
269 cache->len = htons(dom->len);
270 cache->gen = htons(dom->gen);
271 cache->member_cnt = htons(member_cnt);
272 cache->up_map = cpu_to_be64(dom->up_map);
273 mon_apply_domain(mon, self);
276 /* mon_update_neighbors() : update preceding neighbors of added/removed peer
278 static void mon_update_neighbors(struct tipc_monitor *mon,
279 struct tipc_peer *peer)
283 dz = dom_size(mon->peer_cnt);
284 for (i = 0; i < dz; i++) {
285 mon_apply_domain(mon, peer);
286 peer = peer_prev(peer);
290 /* mon_assign_roles() : reassign peer roles after a network change
291 * The monitor list is consistent at this stage; i.e., each peer is monitoring
292 * a set of domain members as matched between domain record and the monitor list
294 static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
296 struct tipc_peer *peer = peer_nxt(head);
297 struct tipc_peer *self = mon->self;
300 for (; peer != self; peer = peer_nxt(peer)) {
301 peer->is_local = false;
303 /* Update domain member */
304 if (i++ < head->applied) {
305 peer->is_head = false;
307 peer->is_local = true;
310 /* Assign next domain head */
316 head->is_head = true;
322 void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
324 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
325 struct tipc_peer *self = get_self(net, bearer_id);
326 struct tipc_peer *peer, *prev, *head;
328 write_lock_bh(&mon->lock);
329 peer = get_peer(mon, addr);
332 prev = peer_prev(peer);
333 list_del(&peer->list);
334 hlist_del(&peer->hash);
338 head = peer_head(prev);
340 mon_update_local_domain(mon);
341 mon_update_neighbors(mon, prev);
343 /* Revert to full-mesh monitoring if we reach threshold */
344 if (!tipc_mon_is_active(net, mon)) {
345 list_for_each_entry(peer, &self->list, list) {
351 mon_assign_roles(mon, head);
353 write_unlock_bh(&mon->lock);
356 static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
357 struct tipc_peer **peer)
359 struct tipc_peer *self = mon->self;
360 struct tipc_peer *cur, *prev, *p;
362 p = kzalloc(sizeof(*p), GFP_ATOMIC);
368 /* Add new peer to lookup list */
369 INIT_LIST_HEAD(&p->list);
370 hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
372 /* Sort new peer into iterator list, in ascending circular order */
374 list_for_each_entry(cur, &self->list, list) {
375 if ((addr > prev->addr) && (addr < cur->addr))
377 if (((addr < cur->addr) || (addr > prev->addr)) &&
378 (prev->addr > cur->addr))
382 list_add_tail(&p->list, &cur->list);
384 mon_update_neighbors(mon, p);
388 void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
390 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
391 struct tipc_peer *self = get_self(net, bearer_id);
392 struct tipc_peer *peer, *head;
394 write_lock_bh(&mon->lock);
395 peer = get_peer(mon, addr);
396 if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
399 head = peer_head(peer);
401 mon_update_local_domain(mon);
402 mon_assign_roles(mon, head);
404 write_unlock_bh(&mon->lock);
407 void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
409 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
410 struct tipc_peer *self = get_self(net, bearer_id);
411 struct tipc_peer *peer, *head;
412 struct tipc_mon_domain *dom;
415 write_lock_bh(&mon->lock);
416 peer = get_peer(mon, addr);
418 pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
421 applied = peer->applied;
426 mon_identify_lost_members(peer, dom, applied);
429 peer->is_head = false;
430 peer->is_local = false;
432 head = peer_head(peer);
434 mon_update_local_domain(mon);
435 mon_assign_roles(mon, head);
437 write_unlock_bh(&mon->lock);
440 /* tipc_mon_rcv - process monitor domain event message
442 void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
443 struct tipc_mon_state *state, int bearer_id)
445 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
446 struct tipc_mon_domain *arrv_dom = data;
447 struct tipc_mon_domain dom_bef;
448 struct tipc_mon_domain *dom;
449 struct tipc_peer *peer;
450 u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
451 int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
452 u16 new_gen = ntohs(arrv_dom->gen);
453 u16 acked_gen = ntohs(arrv_dom->ack_gen);
454 bool probing = state->probing;
457 state->probing = false;
461 /* Sanity check received domain record */
462 if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
463 pr_warn_ratelimited("Received illegal domain record\n");
467 /* Synch generation numbers with peer if link just came up */
468 if (!state->synched) {
469 state->peer_gen = new_gen - 1;
470 state->acked_gen = acked_gen;
471 state->synched = true;
474 if (more(acked_gen, state->acked_gen))
475 state->acked_gen = acked_gen;
477 /* Drop duplicate unless we are waiting for a probe response */
478 if (!more(new_gen, state->peer_gen) && !probing)
481 write_lock_bh(&mon->lock);
482 peer = get_peer(mon, addr);
483 if (!peer || !peer->is_up)
486 /* Peer is confirmed, stop any ongoing probing */
489 /* Task is done for duplicate record */
490 if (!more(new_gen, state->peer_gen))
493 state->peer_gen = new_gen;
495 /* Cache current domain record for later use */
496 dom_bef.member_cnt = 0;
499 memcpy(&dom_bef, dom, dom->len);
501 /* Transform and store received domain record */
502 if (!dom || (dom->len < new_dlen)) {
504 dom = kmalloc(new_dlen, GFP_ATOMIC);
511 dom->member_cnt = new_member_cnt;
512 dom->up_map = be64_to_cpu(arrv_dom->up_map);
513 for (i = 0; i < new_member_cnt; i++)
514 dom->members[i] = ntohl(arrv_dom->members[i]);
516 /* Update peers affected by this domain record */
517 applied_bef = peer->applied;
518 mon_apply_domain(mon, peer);
519 mon_identify_lost_members(peer, &dom_bef, applied_bef);
520 mon_assign_roles(mon, peer_head(peer));
522 write_unlock_bh(&mon->lock);
525 void tipc_mon_prep(struct net *net, void *data, int *dlen,
526 struct tipc_mon_state *state, int bearer_id)
528 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
529 struct tipc_mon_domain *dom = data;
530 u16 gen = mon->dom_gen;
533 if (!tipc_mon_is_active(net, mon))
536 /* Send only a dummy record with ack if peer has acked our last sent */
537 if (likely(state->acked_gen == gen)) {
538 len = dom_rec_len(dom, 0);
540 dom->len = htons(len);
541 dom->gen = htons(gen);
542 dom->ack_gen = htons(state->peer_gen);
546 /* Send the full record */
547 read_lock_bh(&mon->lock);
548 len = ntohs(mon->cache.len);
550 memcpy(data, &mon->cache, len);
551 read_unlock_bh(&mon->lock);
552 dom->ack_gen = htons(state->peer_gen);
555 void tipc_mon_get_state(struct net *net, u32 addr,
556 struct tipc_mon_state *state,
559 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
560 struct tipc_peer *peer;
562 /* Used cached state if table has not changed */
563 if (!state->probing &&
564 (state->list_gen == mon->list_gen) &&
565 (state->acked_gen == mon->dom_gen))
568 read_lock_bh(&mon->lock);
569 peer = get_peer(mon, addr);
571 state->probing = state->acked_gen != mon->dom_gen;
572 state->probing |= peer->down_cnt;
573 state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
574 state->monitoring = peer->is_local;
575 state->monitoring |= peer->is_head;
576 state->list_gen = mon->list_gen;
578 read_unlock_bh(&mon->lock);
581 static void mon_timeout(unsigned long m)
583 struct tipc_monitor *mon = (void *)m;
584 struct tipc_peer *self;
585 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
587 write_lock_bh(&mon->lock);
589 if (self && (best_member_cnt != self->applied)) {
590 mon_update_local_domain(mon);
591 mon_assign_roles(mon, self);
593 write_unlock_bh(&mon->lock);
594 mod_timer(&mon->timer, jiffies + mon->timer_intv);
597 int tipc_mon_create(struct net *net, int bearer_id)
599 struct tipc_net *tn = tipc_net(net);
600 struct tipc_monitor *mon;
601 struct tipc_peer *self;
602 struct tipc_mon_domain *dom;
604 if (tn->monitors[bearer_id])
607 mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
608 self = kzalloc(sizeof(*self), GFP_ATOMIC);
609 dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
610 if (!mon || !self || !dom) {
616 tn->monitors[bearer_id] = mon;
617 rwlock_init(&mon->lock);
622 self->addr = tipc_own_addr(net);
624 self->is_head = true;
625 INIT_LIST_HEAD(&self->list);
626 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
627 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
628 mod_timer(&mon->timer, jiffies + mon->timer_intv);
632 void tipc_mon_delete(struct net *net, int bearer_id)
634 struct tipc_net *tn = tipc_net(net);
635 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
636 struct tipc_peer *self = get_self(net, bearer_id);
637 struct tipc_peer *peer, *tmp;
639 write_lock_bh(&mon->lock);
640 tn->monitors[bearer_id] = NULL;
641 list_for_each_entry_safe(peer, tmp, &self->list, list) {
642 list_del(&peer->list);
643 hlist_del(&peer->hash);
648 write_unlock_bh(&mon->lock);
649 del_timer_sync(&mon->timer);
655 int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
657 struct tipc_net *tn = tipc_net(net);
659 if (cluster_size > TIPC_CLUSTER_SIZE)
662 tn->mon_threshold = cluster_size;
667 int tipc_nl_monitor_get_threshold(struct net *net)
669 struct tipc_net *tn = tipc_net(net);
671 return tn->mon_threshold;
674 int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg)
676 struct tipc_mon_domain *dom = peer->domain;
677 struct nlattr *attrs;
680 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
681 NLM_F_MULTI, TIPC_NL_MON_PEER_GET);
685 attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER);
689 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr))
691 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied))
695 if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP))
698 if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL))
701 if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD))
705 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen))
707 if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP,
708 dom->up_map, TIPC_NLA_MON_PEER_PAD))
710 if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS,
711 dom->member_cnt * sizeof(u32), &dom->members))
715 nla_nest_end(msg->skb, attrs);
716 genlmsg_end(msg->skb, hdr);
720 nla_nest_cancel(msg->skb, attrs);
722 genlmsg_cancel(msg->skb, hdr);
727 int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
728 u32 bearer_id, u32 *prev_node)
730 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
731 struct tipc_peer *peer = mon->self;
736 read_lock_bh(&mon->lock);
739 if (peer->addr == *prev_node)
744 if (__tipc_nl_add_monitor_peer(peer, msg)) {
745 *prev_node = peer->addr;
746 read_unlock_bh(&mon->lock);
749 } while ((peer = peer_nxt(peer)) != mon->self);
750 read_unlock_bh(&mon->lock);
755 int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
758 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
759 char bearer_name[TIPC_MAX_BEARER_NAME];
760 struct nlattr *attrs;
764 ret = tipc_bearer_get_name(net, bearer_name, bearer_id);
768 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
769 NLM_F_MULTI, TIPC_NL_MON_GET);
773 attrs = nla_nest_start(msg->skb, TIPC_NLA_MON);
777 read_lock_bh(&mon->lock);
778 if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id))
780 if (tipc_mon_is_active(net, mon))
781 if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE))
783 if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name))
785 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt))
787 if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen))
790 read_unlock_bh(&mon->lock);
791 nla_nest_end(msg->skb, attrs);
792 genlmsg_end(msg->skb, hdr);
797 read_unlock_bh(&mon->lock);
798 nla_nest_cancel(msg->skb, attrs);
800 genlmsg_cancel(msg->skb, hdr);