1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 struct net_device *dev);
61 static const struct seq_operations neigh_stat_seq_ops;
65 Neighbour hash table buckets are protected with rwlock tbl->lock.
67 - All the scans/updates to hash buckets MUST be made under this lock.
68 - NOTHING clever should be made under this lock: no callbacks
69 to protocol backends, no attempts to send something to network.
70 It will result in deadlocks, if backend/driver wants to use neighbour
72 - If the entry requires some non-trivial actions, increase
73 its reference count and release table lock.
75 Neighbour entries are protected:
76 - with reference count.
77 - with rwlock neigh->lock
79 Reference count prevents destruction.
81 neigh->lock mainly serializes ll address data and its validity state.
82 However, the same lock is used to protect another entry fields:
86 Again, nothing clever shall be made under neigh->lock,
87 the most complicated procedure, which we allow is dev->hard_header.
88 It is supposed, that dev->hard_header is simplistic and does
89 not make callbacks to neighbour tables.
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 trace_neigh_cleanup_and_release(neigh, 0);
101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
118 static void neigh_mark_dead(struct neighbour *n)
121 if (!list_empty(&n->gc_list)) {
122 list_del_init(&n->gc_list);
123 atomic_dec(&n->tbl->gc_entries);
127 static void neigh_update_gc_list(struct neighbour *n)
129 bool on_gc_list, exempt_from_gc;
131 write_lock_bh(&n->tbl->lock);
132 write_lock(&n->lock);
137 /* remove from the gc list if new state is permanent or if neighbor
138 * is externally learned; otherwise entry should be on the gc list
140 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
141 n->flags & NTF_EXT_LEARNED;
142 on_gc_list = !list_empty(&n->gc_list);
144 if (exempt_from_gc && on_gc_list) {
145 list_del_init(&n->gc_list);
146 atomic_dec(&n->tbl->gc_entries);
147 } else if (!exempt_from_gc && !on_gc_list) {
148 /* add entries to the tail; cleaning removes from the front */
149 list_add_tail(&n->gc_list, &n->tbl->gc_list);
150 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
164 if (!(flags & NEIGH_UPDATE_F_ADMIN))
167 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
168 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
169 if (ndm_flags & NTF_EXT_LEARNED)
170 neigh->flags |= NTF_EXT_LEARNED;
172 neigh->flags &= ~NTF_EXT_LEARNED;
180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
181 struct neigh_table *tbl)
185 write_lock(&n->lock);
186 if (refcount_read(&n->refcnt) == 1) {
187 struct neighbour *neigh;
189 neigh = rcu_dereference_protected(n->next,
190 lockdep_is_held(&tbl->lock));
191 rcu_assign_pointer(*np, neigh);
195 write_unlock(&n->lock);
197 neigh_cleanup_and_release(n);
201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
203 struct neigh_hash_table *nht;
204 void *pkey = ndel->primary_key;
207 struct neighbour __rcu **np;
209 nht = rcu_dereference_protected(tbl->nht,
210 lockdep_is_held(&tbl->lock));
211 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
212 hash_val = hash_val >> (32 - nht->hash_shift);
214 np = &nht->hash_buckets[hash_val];
215 while ((n = rcu_dereference_protected(*np,
216 lockdep_is_held(&tbl->lock)))) {
218 return neigh_del(n, np, tbl);
224 static int neigh_forced_gc(struct neigh_table *tbl)
226 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
227 unsigned long tref = jiffies - 5 * HZ;
228 struct neighbour *n, *tmp;
231 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
233 write_lock_bh(&tbl->lock);
235 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
236 if (refcount_read(&n->refcnt) == 1) {
239 write_lock(&n->lock);
240 if ((n->nud_state == NUD_FAILED) ||
241 (n->nud_state == NUD_NOARP) ||
242 (tbl->is_multicast &&
243 tbl->is_multicast(n->primary_key)) ||
244 time_after(tref, n->updated))
246 write_unlock(&n->lock);
248 if (remove && neigh_remove_one(n, tbl))
250 if (shrunk >= max_clean)
255 tbl->last_flush = jiffies;
257 write_unlock_bh(&tbl->lock);
262 static void neigh_add_timer(struct neighbour *n, unsigned long when)
265 if (unlikely(mod_timer(&n->timer, when))) {
266 printk("NEIGH: BUG, double timer add, state is %x\n",
272 static int neigh_del_timer(struct neighbour *n)
274 if ((n->nud_state & NUD_IN_TIMER) &&
275 del_timer(&n->timer)) {
282 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
284 struct sk_buff_head tmp;
288 skb_queue_head_init(&tmp);
289 spin_lock_irqsave(&list->lock, flags);
290 skb = skb_peek(list);
291 while (skb != NULL) {
292 struct sk_buff *skb_next = skb_peek_next(skb, list);
293 if (net == NULL || net_eq(dev_net(skb->dev), net)) {
294 __skb_unlink(skb, list);
295 __skb_queue_tail(&tmp, skb);
299 spin_unlock_irqrestore(&list->lock, flags);
301 while ((skb = __skb_dequeue(&tmp))) {
307 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
311 struct neigh_hash_table *nht;
313 nht = rcu_dereference_protected(tbl->nht,
314 lockdep_is_held(&tbl->lock));
316 for (i = 0; i < (1 << nht->hash_shift); i++) {
318 struct neighbour __rcu **np = &nht->hash_buckets[i];
320 while ((n = rcu_dereference_protected(*np,
321 lockdep_is_held(&tbl->lock))) != NULL) {
322 if (dev && n->dev != dev) {
326 if (skip_perm && n->nud_state & NUD_PERMANENT) {
330 rcu_assign_pointer(*np,
331 rcu_dereference_protected(n->next,
332 lockdep_is_held(&tbl->lock)));
333 write_lock(&n->lock);
336 if (refcount_read(&n->refcnt) != 1) {
337 /* The most unpleasant situation.
338 We must destroy neighbour entry,
339 but someone still uses it.
341 The destroy will be delayed until
342 the last user releases us, but
343 we must kill timers etc. and move
346 __skb_queue_purge(&n->arp_queue);
347 n->arp_queue_len_bytes = 0;
348 n->output = neigh_blackhole;
349 if (n->nud_state & NUD_VALID)
350 n->nud_state = NUD_NOARP;
352 n->nud_state = NUD_NONE;
353 neigh_dbg(2, "neigh %p is stray\n", n);
355 write_unlock(&n->lock);
356 neigh_cleanup_and_release(n);
361 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
363 write_lock_bh(&tbl->lock);
364 neigh_flush_dev(tbl, dev, false);
365 write_unlock_bh(&tbl->lock);
367 EXPORT_SYMBOL(neigh_changeaddr);
369 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
372 write_lock_bh(&tbl->lock);
373 neigh_flush_dev(tbl, dev, skip_perm);
374 pneigh_ifdown_and_unlock(tbl, dev);
375 pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev));
376 if (skb_queue_empty_lockless(&tbl->proxy_queue))
377 del_timer_sync(&tbl->proxy_timer);
381 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
383 __neigh_ifdown(tbl, dev, true);
386 EXPORT_SYMBOL(neigh_carrier_down);
388 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
390 __neigh_ifdown(tbl, dev, false);
393 EXPORT_SYMBOL(neigh_ifdown);
395 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
396 struct net_device *dev,
397 u8 flags, bool exempt_from_gc)
399 struct neighbour *n = NULL;
400 unsigned long now = jiffies;
406 entries = atomic_inc_return(&tbl->gc_entries) - 1;
407 if (entries >= tbl->gc_thresh3 ||
408 (entries >= tbl->gc_thresh2 &&
409 time_after(now, tbl->last_flush + 5 * HZ))) {
410 if (!neigh_forced_gc(tbl) &&
411 entries >= tbl->gc_thresh3) {
412 net_info_ratelimited("%s: neighbor table overflow!\n",
414 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
420 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
424 __skb_queue_head_init(&n->arp_queue);
425 rwlock_init(&n->lock);
426 seqlock_init(&n->ha_lock);
427 n->updated = n->used = now;
428 n->nud_state = NUD_NONE;
429 n->output = neigh_blackhole;
431 seqlock_init(&n->hh.hh_lock);
432 n->parms = neigh_parms_clone(&tbl->parms);
433 timer_setup(&n->timer, neigh_timer_handler, 0);
435 NEIGH_CACHE_STAT_INC(tbl, allocs);
437 refcount_set(&n->refcnt, 1);
439 INIT_LIST_HEAD(&n->gc_list);
441 atomic_inc(&tbl->entries);
447 atomic_dec(&tbl->gc_entries);
451 static void neigh_get_hash_rnd(u32 *x)
453 *x = get_random_u32() | 1;
456 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
458 size_t size = (1 << shift) * sizeof(struct neighbour *);
459 struct neigh_hash_table *ret;
460 struct neighbour __rcu **buckets;
463 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
466 if (size <= PAGE_SIZE) {
467 buckets = kzalloc(size, GFP_ATOMIC);
469 buckets = (struct neighbour __rcu **)
470 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
472 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
478 ret->hash_buckets = buckets;
479 ret->hash_shift = shift;
480 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
481 neigh_get_hash_rnd(&ret->hash_rnd[i]);
485 static void neigh_hash_free_rcu(struct rcu_head *head)
487 struct neigh_hash_table *nht = container_of(head,
488 struct neigh_hash_table,
490 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
491 struct neighbour __rcu **buckets = nht->hash_buckets;
493 if (size <= PAGE_SIZE) {
496 kmemleak_free(buckets);
497 free_pages((unsigned long)buckets, get_order(size));
502 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
503 unsigned long new_shift)
505 unsigned int i, hash;
506 struct neigh_hash_table *new_nht, *old_nht;
508 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
510 old_nht = rcu_dereference_protected(tbl->nht,
511 lockdep_is_held(&tbl->lock));
512 new_nht = neigh_hash_alloc(new_shift);
516 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
517 struct neighbour *n, *next;
519 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
520 lockdep_is_held(&tbl->lock));
523 hash = tbl->hash(n->primary_key, n->dev,
526 hash >>= (32 - new_nht->hash_shift);
527 next = rcu_dereference_protected(n->next,
528 lockdep_is_held(&tbl->lock));
530 rcu_assign_pointer(n->next,
531 rcu_dereference_protected(
532 new_nht->hash_buckets[hash],
533 lockdep_is_held(&tbl->lock)));
534 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
538 rcu_assign_pointer(tbl->nht, new_nht);
539 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
543 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
544 struct net_device *dev)
548 NEIGH_CACHE_STAT_INC(tbl, lookups);
551 n = __neigh_lookup_noref(tbl, pkey, dev);
553 if (!refcount_inc_not_zero(&n->refcnt))
555 NEIGH_CACHE_STAT_INC(tbl, hits);
558 rcu_read_unlock_bh();
561 EXPORT_SYMBOL(neigh_lookup);
563 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
567 unsigned int key_len = tbl->key_len;
569 struct neigh_hash_table *nht;
571 NEIGH_CACHE_STAT_INC(tbl, lookups);
574 nht = rcu_dereference_bh(tbl->nht);
575 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
577 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
579 n = rcu_dereference_bh(n->next)) {
580 if (!memcmp(n->primary_key, pkey, key_len) &&
581 net_eq(dev_net(n->dev), net)) {
582 if (!refcount_inc_not_zero(&n->refcnt))
584 NEIGH_CACHE_STAT_INC(tbl, hits);
589 rcu_read_unlock_bh();
592 EXPORT_SYMBOL(neigh_lookup_nodev);
594 static struct neighbour *
595 ___neigh_create(struct neigh_table *tbl, const void *pkey,
596 struct net_device *dev, u8 flags,
597 bool exempt_from_gc, bool want_ref)
599 u32 hash_val, key_len = tbl->key_len;
600 struct neighbour *n1, *rc, *n;
601 struct neigh_hash_table *nht;
604 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
605 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
607 rc = ERR_PTR(-ENOBUFS);
611 memcpy(n->primary_key, pkey, key_len);
615 /* Protocol specific setup. */
616 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
618 goto out_neigh_release;
621 if (dev->netdev_ops->ndo_neigh_construct) {
622 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
625 goto out_neigh_release;
629 /* Device specific setup. */
630 if (n->parms->neigh_setup &&
631 (error = n->parms->neigh_setup(n)) < 0) {
633 goto out_neigh_release;
636 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
638 write_lock_bh(&tbl->lock);
639 nht = rcu_dereference_protected(tbl->nht,
640 lockdep_is_held(&tbl->lock));
642 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
643 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
645 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
647 if (n->parms->dead) {
648 rc = ERR_PTR(-EINVAL);
652 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
653 lockdep_is_held(&tbl->lock));
655 n1 = rcu_dereference_protected(n1->next,
656 lockdep_is_held(&tbl->lock))) {
657 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
667 list_add_tail(&n->gc_list, &n->tbl->gc_list);
671 rcu_assign_pointer(n->next,
672 rcu_dereference_protected(nht->hash_buckets[hash_val],
673 lockdep_is_held(&tbl->lock)));
674 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
675 write_unlock_bh(&tbl->lock);
676 neigh_dbg(2, "neigh %p is created\n", n);
681 write_unlock_bh(&tbl->lock);
684 atomic_dec(&tbl->gc_entries);
689 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
690 struct net_device *dev, bool want_ref)
692 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
694 EXPORT_SYMBOL(__neigh_create);
696 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
698 u32 hash_val = *(u32 *)(pkey + key_len - 4);
699 hash_val ^= (hash_val >> 16);
700 hash_val ^= hash_val >> 8;
701 hash_val ^= hash_val >> 4;
702 hash_val &= PNEIGH_HASHMASK;
706 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
709 unsigned int key_len,
710 struct net_device *dev)
713 if (!memcmp(n->key, pkey, key_len) &&
714 net_eq(pneigh_net(n), net) &&
715 (n->dev == dev || !n->dev))
722 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
723 struct net *net, const void *pkey, struct net_device *dev)
725 unsigned int key_len = tbl->key_len;
726 u32 hash_val = pneigh_hash(pkey, key_len);
728 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
729 net, pkey, key_len, dev);
731 EXPORT_SYMBOL_GPL(__pneigh_lookup);
733 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
734 struct net *net, const void *pkey,
735 struct net_device *dev, int creat)
737 struct pneigh_entry *n;
738 unsigned int key_len = tbl->key_len;
739 u32 hash_val = pneigh_hash(pkey, key_len);
741 read_lock_bh(&tbl->lock);
742 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
743 net, pkey, key_len, dev);
744 read_unlock_bh(&tbl->lock);
751 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
755 write_pnet(&n->net, net);
756 memcpy(n->key, pkey, key_len);
760 if (tbl->pconstructor && tbl->pconstructor(n)) {
767 write_lock_bh(&tbl->lock);
768 n->next = tbl->phash_buckets[hash_val];
769 tbl->phash_buckets[hash_val] = n;
770 write_unlock_bh(&tbl->lock);
774 EXPORT_SYMBOL(pneigh_lookup);
777 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
778 struct net_device *dev)
780 struct pneigh_entry *n, **np;
781 unsigned int key_len = tbl->key_len;
782 u32 hash_val = pneigh_hash(pkey, key_len);
784 write_lock_bh(&tbl->lock);
785 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
787 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
788 net_eq(pneigh_net(n), net)) {
790 write_unlock_bh(&tbl->lock);
791 if (tbl->pdestructor)
798 write_unlock_bh(&tbl->lock);
802 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
803 struct net_device *dev)
805 struct pneigh_entry *n, **np, *freelist = NULL;
808 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
809 np = &tbl->phash_buckets[h];
810 while ((n = *np) != NULL) {
811 if (!dev || n->dev == dev) {
820 write_unlock_bh(&tbl->lock);
821 while ((n = freelist)) {
824 if (tbl->pdestructor)
832 static void neigh_parms_destroy(struct neigh_parms *parms);
834 static inline void neigh_parms_put(struct neigh_parms *parms)
836 if (refcount_dec_and_test(&parms->refcnt))
837 neigh_parms_destroy(parms);
841 * neighbour must already be out of the table;
844 void neigh_destroy(struct neighbour *neigh)
846 struct net_device *dev = neigh->dev;
848 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
851 pr_warn("Destroying alive neighbour %p\n", neigh);
856 if (neigh_del_timer(neigh))
857 pr_warn("Impossible event\n");
859 write_lock_bh(&neigh->lock);
860 __skb_queue_purge(&neigh->arp_queue);
861 write_unlock_bh(&neigh->lock);
862 neigh->arp_queue_len_bytes = 0;
864 if (dev->netdev_ops->ndo_neigh_destroy)
865 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
868 neigh_parms_put(neigh->parms);
870 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
872 atomic_dec(&neigh->tbl->entries);
873 kfree_rcu(neigh, rcu);
875 EXPORT_SYMBOL(neigh_destroy);
877 /* Neighbour state is suspicious;
880 Called with write_locked neigh.
882 static void neigh_suspect(struct neighbour *neigh)
884 neigh_dbg(2, "neigh %p is suspected\n", neigh);
886 neigh->output = neigh->ops->output;
889 /* Neighbour state is OK;
892 Called with write_locked neigh.
894 static void neigh_connect(struct neighbour *neigh)
896 neigh_dbg(2, "neigh %p is connected\n", neigh);
898 neigh->output = neigh->ops->connected_output;
901 static void neigh_periodic_work(struct work_struct *work)
903 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
905 struct neighbour __rcu **np;
907 struct neigh_hash_table *nht;
909 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
911 write_lock_bh(&tbl->lock);
912 nht = rcu_dereference_protected(tbl->nht,
913 lockdep_is_held(&tbl->lock));
916 * periodically recompute ReachableTime from random function
919 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
920 struct neigh_parms *p;
921 tbl->last_rand = jiffies;
922 list_for_each_entry(p, &tbl->parms_list, list)
924 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
927 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
930 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
931 np = &nht->hash_buckets[i];
933 while ((n = rcu_dereference_protected(*np,
934 lockdep_is_held(&tbl->lock))) != NULL) {
937 write_lock(&n->lock);
939 state = n->nud_state;
940 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
941 (n->flags & NTF_EXT_LEARNED)) {
942 write_unlock(&n->lock);
946 if (time_before(n->used, n->confirmed))
947 n->used = n->confirmed;
949 if (refcount_read(&n->refcnt) == 1 &&
950 (state == NUD_FAILED ||
951 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
954 write_unlock(&n->lock);
955 neigh_cleanup_and_release(n);
958 write_unlock(&n->lock);
964 * It's fine to release lock here, even if hash table
965 * grows while we are preempted.
967 write_unlock_bh(&tbl->lock);
969 write_lock_bh(&tbl->lock);
970 nht = rcu_dereference_protected(tbl->nht,
971 lockdep_is_held(&tbl->lock));
974 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
975 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
976 * BASE_REACHABLE_TIME.
978 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
979 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
980 write_unlock_bh(&tbl->lock);
983 static __inline__ int neigh_max_probes(struct neighbour *n)
985 struct neigh_parms *p = n->parms;
986 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
987 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
988 NEIGH_VAR(p, MCAST_PROBES));
991 static void neigh_invalidate(struct neighbour *neigh)
992 __releases(neigh->lock)
993 __acquires(neigh->lock)
997 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
998 neigh_dbg(2, "neigh %p is failed\n", neigh);
999 neigh->updated = jiffies;
1001 /* It is very thin place. report_unreachable is very complicated
1002 routine. Particularly, it can hit the same neighbour entry!
1004 So that, we try to be accurate and avoid dead loop. --ANK
1006 while (neigh->nud_state == NUD_FAILED &&
1007 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1008 write_unlock(&neigh->lock);
1009 neigh->ops->error_report(neigh, skb);
1010 write_lock(&neigh->lock);
1012 __skb_queue_purge(&neigh->arp_queue);
1013 neigh->arp_queue_len_bytes = 0;
1016 static void neigh_probe(struct neighbour *neigh)
1017 __releases(neigh->lock)
1019 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1020 /* keep skb alive even if arp_queue overflows */
1022 skb = skb_clone(skb, GFP_ATOMIC);
1023 write_unlock(&neigh->lock);
1024 if (neigh->ops->solicit)
1025 neigh->ops->solicit(neigh, skb);
1026 atomic_inc(&neigh->probes);
1030 /* Called when a timer expires for a neighbour entry. */
1032 static void neigh_timer_handler(struct timer_list *t)
1034 unsigned long now, next;
1035 struct neighbour *neigh = from_timer(neigh, t, timer);
1039 write_lock(&neigh->lock);
1041 state = neigh->nud_state;
1045 if (!(state & NUD_IN_TIMER))
1048 if (state & NUD_REACHABLE) {
1049 if (time_before_eq(now,
1050 neigh->confirmed + neigh->parms->reachable_time)) {
1051 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1052 next = neigh->confirmed + neigh->parms->reachable_time;
1053 } else if (time_before_eq(now,
1055 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1056 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1057 neigh->nud_state = NUD_DELAY;
1058 neigh->updated = jiffies;
1059 neigh_suspect(neigh);
1060 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1062 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1063 neigh->nud_state = NUD_STALE;
1064 neigh->updated = jiffies;
1065 neigh_suspect(neigh);
1068 } else if (state & NUD_DELAY) {
1069 if (time_before_eq(now,
1071 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1072 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1073 neigh->nud_state = NUD_REACHABLE;
1074 neigh->updated = jiffies;
1075 neigh_connect(neigh);
1077 next = neigh->confirmed + neigh->parms->reachable_time;
1079 neigh_dbg(2, "neigh %p is probed\n", neigh);
1080 neigh->nud_state = NUD_PROBE;
1081 neigh->updated = jiffies;
1082 atomic_set(&neigh->probes, 0);
1084 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1088 /* NUD_PROBE|NUD_INCOMPLETE */
1089 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1092 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1093 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1094 neigh->nud_state = NUD_FAILED;
1096 neigh_invalidate(neigh);
1100 if (neigh->nud_state & NUD_IN_TIMER) {
1101 if (time_before(next, jiffies + HZ/100))
1102 next = jiffies + HZ/100;
1103 if (!mod_timer(&neigh->timer, next))
1106 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1110 write_unlock(&neigh->lock);
1114 neigh_update_notify(neigh, 0);
1116 trace_neigh_timer_handler(neigh, 0);
1118 neigh_release(neigh);
1121 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1124 bool immediate_probe = false;
1126 write_lock_bh(&neigh->lock);
1129 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1134 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1135 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1136 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1137 unsigned long next, now = jiffies;
1139 atomic_set(&neigh->probes,
1140 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1141 neigh_del_timer(neigh);
1142 neigh->nud_state = NUD_INCOMPLETE;
1143 neigh->updated = now;
1144 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1146 neigh_add_timer(neigh, next);
1147 immediate_probe = true;
1149 neigh->nud_state = NUD_FAILED;
1150 neigh->updated = jiffies;
1151 write_unlock_bh(&neigh->lock);
1156 } else if (neigh->nud_state & NUD_STALE) {
1157 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1158 neigh_del_timer(neigh);
1159 neigh->nud_state = NUD_DELAY;
1160 neigh->updated = jiffies;
1161 neigh_add_timer(neigh, jiffies +
1162 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1165 if (neigh->nud_state == NUD_INCOMPLETE) {
1167 while (neigh->arp_queue_len_bytes + skb->truesize >
1168 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1169 struct sk_buff *buff;
1171 buff = __skb_dequeue(&neigh->arp_queue);
1174 neigh->arp_queue_len_bytes -= buff->truesize;
1176 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1179 __skb_queue_tail(&neigh->arp_queue, skb);
1180 neigh->arp_queue_len_bytes += skb->truesize;
1185 if (immediate_probe)
1188 write_unlock(&neigh->lock);
1190 trace_neigh_event_send_done(neigh, rc);
1194 if (neigh->nud_state & NUD_STALE)
1196 write_unlock_bh(&neigh->lock);
1198 trace_neigh_event_send_dead(neigh, 1);
1201 EXPORT_SYMBOL(__neigh_event_send);
1203 static void neigh_update_hhs(struct neighbour *neigh)
1205 struct hh_cache *hh;
1206 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1209 if (neigh->dev->header_ops)
1210 update = neigh->dev->header_ops->cache_update;
1214 if (READ_ONCE(hh->hh_len)) {
1215 write_seqlock_bh(&hh->hh_lock);
1216 update(hh, neigh->dev, neigh->ha);
1217 write_sequnlock_bh(&hh->hh_lock);
1224 /* Generic update routine.
1225 -- lladdr is new lladdr or NULL, if it is not supplied.
1226 -- new is new state.
1228 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1230 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1231 lladdr instead of overriding it
1233 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1234 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1235 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1237 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1240 Caller MUST hold reference count on the entry.
1243 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1244 u8 new, u32 flags, u32 nlmsg_pid,
1245 struct netlink_ext_ack *extack)
1247 bool ext_learn_change = false;
1251 struct net_device *dev;
1252 int update_isrouter = 0;
1254 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1256 write_lock_bh(&neigh->lock);
1259 old = neigh->nud_state;
1263 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1267 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1268 (old & (NUD_NOARP | NUD_PERMANENT)))
1271 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1272 if (flags & NEIGH_UPDATE_F_USE) {
1273 new = old & ~NUD_PERMANENT;
1274 neigh->nud_state = new;
1279 if (!(new & NUD_VALID)) {
1280 neigh_del_timer(neigh);
1281 if (old & NUD_CONNECTED)
1282 neigh_suspect(neigh);
1283 neigh->nud_state = new;
1285 notify = old & NUD_VALID;
1286 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1287 (new & NUD_FAILED)) {
1288 neigh_invalidate(neigh);
1294 /* Compare new lladdr with cached one */
1295 if (!dev->addr_len) {
1296 /* First case: device needs no address. */
1298 } else if (lladdr) {
1299 /* The second case: if something is already cached
1300 and a new address is proposed:
1302 - if they are different, check override flag
1304 if ((old & NUD_VALID) &&
1305 !memcmp(lladdr, neigh->ha, dev->addr_len))
1308 /* No address is supplied; if we know something,
1309 use it, otherwise discard the request.
1312 if (!(old & NUD_VALID)) {
1313 NL_SET_ERR_MSG(extack, "No link layer address given");
1319 /* Update confirmed timestamp for neighbour entry after we
1320 * received ARP packet even if it doesn't change IP to MAC binding.
1322 if (new & NUD_CONNECTED)
1323 neigh->confirmed = jiffies;
1325 /* If entry was valid and address is not changed,
1326 do not change entry state, if new one is STALE.
1329 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1330 if (old & NUD_VALID) {
1331 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1332 update_isrouter = 0;
1333 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1334 (old & NUD_CONNECTED)) {
1340 if (lladdr == neigh->ha && new == NUD_STALE &&
1341 !(flags & NEIGH_UPDATE_F_ADMIN))
1346 /* Update timestamp only once we know we will make a change to the
1347 * neighbour entry. Otherwise we risk to move the locktime window with
1348 * noop updates and ignore relevant ARP updates.
1350 if (new != old || lladdr != neigh->ha)
1351 neigh->updated = jiffies;
1354 neigh_del_timer(neigh);
1355 if (new & NUD_PROBE)
1356 atomic_set(&neigh->probes, 0);
1357 if (new & NUD_IN_TIMER)
1358 neigh_add_timer(neigh, (jiffies +
1359 ((new & NUD_REACHABLE) ?
1360 neigh->parms->reachable_time :
1362 neigh->nud_state = new;
1366 if (lladdr != neigh->ha) {
1367 write_seqlock(&neigh->ha_lock);
1368 memcpy(&neigh->ha, lladdr, dev->addr_len);
1369 write_sequnlock(&neigh->ha_lock);
1370 neigh_update_hhs(neigh);
1371 if (!(new & NUD_CONNECTED))
1372 neigh->confirmed = jiffies -
1373 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1378 if (new & NUD_CONNECTED)
1379 neigh_connect(neigh);
1381 neigh_suspect(neigh);
1382 if (!(old & NUD_VALID)) {
1383 struct sk_buff *skb;
1385 /* Again: avoid dead loop if something went wrong */
1387 while (neigh->nud_state & NUD_VALID &&
1388 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1389 struct dst_entry *dst = skb_dst(skb);
1390 struct neighbour *n2, *n1 = neigh;
1391 write_unlock_bh(&neigh->lock);
1395 /* Why not just use 'neigh' as-is? The problem is that
1396 * things such as shaper, eql, and sch_teql can end up
1397 * using alternative, different, neigh objects to output
1398 * the packet in the output path. So what we need to do
1399 * here is re-lookup the top-level neigh in the path so
1400 * we can reinject the packet there.
1403 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1404 n2 = dst_neigh_lookup_skb(dst, skb);
1408 n1->output(n1, skb);
1413 write_lock_bh(&neigh->lock);
1415 __skb_queue_purge(&neigh->arp_queue);
1416 neigh->arp_queue_len_bytes = 0;
1419 if (update_isrouter)
1420 neigh_update_is_router(neigh, flags, ¬ify);
1421 write_unlock_bh(&neigh->lock);
1423 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1424 neigh_update_gc_list(neigh);
1427 neigh_update_notify(neigh, nlmsg_pid);
1429 trace_neigh_update_done(neigh, err);
1434 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1435 u32 flags, u32 nlmsg_pid)
1437 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1439 EXPORT_SYMBOL(neigh_update);
1441 /* Update the neigh to listen temporarily for probe responses, even if it is
1442 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1444 void __neigh_set_probe_once(struct neighbour *neigh)
1448 neigh->updated = jiffies;
1449 if (!(neigh->nud_state & NUD_FAILED))
1451 neigh->nud_state = NUD_INCOMPLETE;
1452 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1453 neigh_add_timer(neigh,
1454 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1457 EXPORT_SYMBOL(__neigh_set_probe_once);
1459 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1460 u8 *lladdr, void *saddr,
1461 struct net_device *dev)
1463 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1464 lladdr || !dev->addr_len);
1466 neigh_update(neigh, lladdr, NUD_STALE,
1467 NEIGH_UPDATE_F_OVERRIDE, 0);
1470 EXPORT_SYMBOL(neigh_event_ns);
1472 /* called with read_lock_bh(&n->lock); */
1473 static void neigh_hh_init(struct neighbour *n)
1475 struct net_device *dev = n->dev;
1476 __be16 prot = n->tbl->protocol;
1477 struct hh_cache *hh = &n->hh;
1479 write_lock_bh(&n->lock);
1481 /* Only one thread can come in here and initialize the
1485 dev->header_ops->cache(n, hh, prot);
1487 write_unlock_bh(&n->lock);
1490 /* Slow and careful. */
1492 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1496 if (!neigh_event_send(neigh, skb)) {
1498 struct net_device *dev = neigh->dev;
1501 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1502 neigh_hh_init(neigh);
1505 __skb_pull(skb, skb_network_offset(skb));
1506 seq = read_seqbegin(&neigh->ha_lock);
1507 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1508 neigh->ha, NULL, skb->len);
1509 } while (read_seqretry(&neigh->ha_lock, seq));
1512 rc = dev_queue_xmit(skb);
1523 EXPORT_SYMBOL(neigh_resolve_output);
1525 /* As fast as possible without hh cache */
1527 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1529 struct net_device *dev = neigh->dev;
1534 __skb_pull(skb, skb_network_offset(skb));
1535 seq = read_seqbegin(&neigh->ha_lock);
1536 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1537 neigh->ha, NULL, skb->len);
1538 } while (read_seqretry(&neigh->ha_lock, seq));
1541 err = dev_queue_xmit(skb);
1548 EXPORT_SYMBOL(neigh_connected_output);
1550 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1552 return dev_queue_xmit(skb);
1554 EXPORT_SYMBOL(neigh_direct_output);
1556 static void neigh_proxy_process(struct timer_list *t)
1558 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1559 long sched_next = 0;
1560 unsigned long now = jiffies;
1561 struct sk_buff *skb, *n;
1563 spin_lock(&tbl->proxy_queue.lock);
1565 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1566 long tdif = NEIGH_CB(skb)->sched_next - now;
1569 struct net_device *dev = skb->dev;
1571 __skb_unlink(skb, &tbl->proxy_queue);
1572 if (tbl->proxy_redo && netif_running(dev)) {
1574 tbl->proxy_redo(skb);
1581 } else if (!sched_next || tdif < sched_next)
1584 del_timer(&tbl->proxy_timer);
1586 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1587 spin_unlock(&tbl->proxy_queue.lock);
1590 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1591 struct sk_buff *skb)
1593 unsigned long sched_next = jiffies +
1594 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
1596 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1601 NEIGH_CB(skb)->sched_next = sched_next;
1602 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1604 spin_lock(&tbl->proxy_queue.lock);
1605 if (del_timer(&tbl->proxy_timer)) {
1606 if (time_before(tbl->proxy_timer.expires, sched_next))
1607 sched_next = tbl->proxy_timer.expires;
1611 __skb_queue_tail(&tbl->proxy_queue, skb);
1612 mod_timer(&tbl->proxy_timer, sched_next);
1613 spin_unlock(&tbl->proxy_queue.lock);
1615 EXPORT_SYMBOL(pneigh_enqueue);
1617 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1618 struct net *net, int ifindex)
1620 struct neigh_parms *p;
1622 list_for_each_entry(p, &tbl->parms_list, list) {
1623 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1624 (!p->dev && !ifindex && net_eq(net, &init_net)))
1631 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1632 struct neigh_table *tbl)
1634 struct neigh_parms *p;
1635 struct net *net = dev_net(dev);
1636 const struct net_device_ops *ops = dev->netdev_ops;
1638 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1641 refcount_set(&p->refcnt, 1);
1643 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1646 write_pnet(&p->net, net);
1647 p->sysctl_table = NULL;
1649 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1655 write_lock_bh(&tbl->lock);
1656 list_add(&p->list, &tbl->parms.list);
1657 write_unlock_bh(&tbl->lock);
1659 neigh_parms_data_state_cleanall(p);
1663 EXPORT_SYMBOL(neigh_parms_alloc);
1665 static void neigh_rcu_free_parms(struct rcu_head *head)
1667 struct neigh_parms *parms =
1668 container_of(head, struct neigh_parms, rcu_head);
1670 neigh_parms_put(parms);
1673 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1675 if (!parms || parms == &tbl->parms)
1677 write_lock_bh(&tbl->lock);
1678 list_del(&parms->list);
1680 write_unlock_bh(&tbl->lock);
1681 dev_put(parms->dev);
1682 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1684 EXPORT_SYMBOL(neigh_parms_release);
1686 static void neigh_parms_destroy(struct neigh_parms *parms)
1691 static struct lock_class_key neigh_table_proxy_queue_class;
1693 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1695 void neigh_table_init(int index, struct neigh_table *tbl)
1697 unsigned long now = jiffies;
1698 unsigned long phsize;
1700 INIT_LIST_HEAD(&tbl->parms_list);
1701 INIT_LIST_HEAD(&tbl->gc_list);
1702 list_add(&tbl->parms.list, &tbl->parms_list);
1703 write_pnet(&tbl->parms.net, &init_net);
1704 refcount_set(&tbl->parms.refcnt, 1);
1705 tbl->parms.reachable_time =
1706 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1708 tbl->stats = alloc_percpu(struct neigh_statistics);
1710 panic("cannot create neighbour cache statistics");
1712 #ifdef CONFIG_PROC_FS
1713 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1714 &neigh_stat_seq_ops, tbl))
1715 panic("cannot create neighbour proc dir entry");
1718 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1720 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1721 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1723 if (!tbl->nht || !tbl->phash_buckets)
1724 panic("cannot allocate neighbour cache hashes");
1726 if (!tbl->entry_size)
1727 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1728 tbl->key_len, NEIGH_PRIV_ALIGN);
1730 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1732 rwlock_init(&tbl->lock);
1733 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1734 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1735 tbl->parms.reachable_time);
1736 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1737 skb_queue_head_init_class(&tbl->proxy_queue,
1738 &neigh_table_proxy_queue_class);
1740 tbl->last_flush = now;
1741 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1743 neigh_tables[index] = tbl;
1745 EXPORT_SYMBOL(neigh_table_init);
1747 int neigh_table_clear(int index, struct neigh_table *tbl)
1749 neigh_tables[index] = NULL;
1750 /* It is not clean... Fix it to unload IPv6 module safely */
1751 cancel_delayed_work_sync(&tbl->gc_work);
1752 del_timer_sync(&tbl->proxy_timer);
1753 pneigh_queue_purge(&tbl->proxy_queue, NULL);
1754 neigh_ifdown(tbl, NULL);
1755 if (atomic_read(&tbl->entries))
1756 pr_crit("neighbour leakage\n");
1758 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1759 neigh_hash_free_rcu);
1762 kfree(tbl->phash_buckets);
1763 tbl->phash_buckets = NULL;
1765 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1767 free_percpu(tbl->stats);
1772 EXPORT_SYMBOL(neigh_table_clear);
1774 static struct neigh_table *neigh_find_table(int family)
1776 struct neigh_table *tbl = NULL;
1780 tbl = neigh_tables[NEIGH_ARP_TABLE];
1783 tbl = neigh_tables[NEIGH_ND_TABLE];
1786 tbl = neigh_tables[NEIGH_DN_TABLE];
1793 const struct nla_policy nda_policy[NDA_MAX+1] = {
1794 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1795 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1796 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1797 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1798 [NDA_PROBES] = { .type = NLA_U32 },
1799 [NDA_VLAN] = { .type = NLA_U16 },
1800 [NDA_PORT] = { .type = NLA_U16 },
1801 [NDA_VNI] = { .type = NLA_U32 },
1802 [NDA_IFINDEX] = { .type = NLA_U32 },
1803 [NDA_MASTER] = { .type = NLA_U32 },
1804 [NDA_PROTOCOL] = { .type = NLA_U8 },
1805 [NDA_NH_ID] = { .type = NLA_U32 },
1806 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1809 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1810 struct netlink_ext_ack *extack)
1812 struct net *net = sock_net(skb->sk);
1814 struct nlattr *dst_attr;
1815 struct neigh_table *tbl;
1816 struct neighbour *neigh;
1817 struct net_device *dev = NULL;
1821 if (nlmsg_len(nlh) < sizeof(*ndm))
1824 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1826 NL_SET_ERR_MSG(extack, "Network address not specified");
1830 ndm = nlmsg_data(nlh);
1831 if (ndm->ndm_ifindex) {
1832 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1839 tbl = neigh_find_table(ndm->ndm_family);
1841 return -EAFNOSUPPORT;
1843 if (nla_len(dst_attr) < (int)tbl->key_len) {
1844 NL_SET_ERR_MSG(extack, "Invalid network address");
1848 if (ndm->ndm_flags & NTF_PROXY) {
1849 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1856 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1857 if (neigh == NULL) {
1862 err = __neigh_update(neigh, NULL, NUD_FAILED,
1863 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1864 NETLINK_CB(skb).portid, extack);
1865 write_lock_bh(&tbl->lock);
1866 neigh_release(neigh);
1867 neigh_remove_one(neigh, tbl);
1868 write_unlock_bh(&tbl->lock);
1874 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1875 struct netlink_ext_ack *extack)
1877 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1878 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1879 struct net *net = sock_net(skb->sk);
1881 struct nlattr *tb[NDA_MAX+1];
1882 struct neigh_table *tbl;
1883 struct net_device *dev = NULL;
1884 struct neighbour *neigh;
1890 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1891 nda_policy, extack);
1897 NL_SET_ERR_MSG(extack, "Network address not specified");
1901 ndm = nlmsg_data(nlh);
1902 if (ndm->ndm_ifindex) {
1903 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1909 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1910 NL_SET_ERR_MSG(extack, "Invalid link address");
1915 tbl = neigh_find_table(ndm->ndm_family);
1917 return -EAFNOSUPPORT;
1919 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1920 NL_SET_ERR_MSG(extack, "Invalid network address");
1924 dst = nla_data(tb[NDA_DST]);
1925 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1927 if (tb[NDA_PROTOCOL])
1928 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1930 if (ndm->ndm_flags & NTF_PROXY) {
1931 struct pneigh_entry *pn;
1934 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1936 pn->flags = ndm->ndm_flags;
1938 pn->protocol = protocol;
1945 NL_SET_ERR_MSG(extack, "Device not specified");
1949 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
1954 neigh = neigh_lookup(tbl, dst, dev);
1955 if (neigh == NULL) {
1956 bool exempt_from_gc;
1958 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1963 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1964 ndm->ndm_flags & NTF_EXT_LEARNED;
1965 neigh = ___neigh_create(tbl, dst, dev,
1966 ndm->ndm_flags & NTF_EXT_LEARNED,
1967 exempt_from_gc, true);
1968 if (IS_ERR(neigh)) {
1969 err = PTR_ERR(neigh);
1973 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1975 neigh_release(neigh);
1979 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1980 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1981 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1985 neigh->protocol = protocol;
1986 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1987 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1988 if (ndm->ndm_flags & NTF_ROUTER)
1989 flags |= NEIGH_UPDATE_F_ISROUTER;
1990 if (ndm->ndm_flags & NTF_USE)
1991 flags |= NEIGH_UPDATE_F_USE;
1993 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1994 NETLINK_CB(skb).portid, extack);
1995 if (!err && ndm->ndm_flags & NTF_USE) {
1996 neigh_event_send(neigh, NULL);
1999 neigh_release(neigh);
2004 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2006 struct nlattr *nest;
2008 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2013 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2014 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2015 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2016 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2017 /* approximative value for deprecated QUEUE_LEN (in packets) */
2018 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2019 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2020 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2021 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2022 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2023 NEIGH_VAR(parms, UCAST_PROBES)) ||
2024 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2025 NEIGH_VAR(parms, MCAST_PROBES)) ||
2026 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2027 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2028 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2030 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2031 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2032 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2033 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2034 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2035 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2036 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2037 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2038 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2039 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2040 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2041 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2042 nla_put_msecs(skb, NDTPA_LOCKTIME,
2043 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2044 goto nla_put_failure;
2045 return nla_nest_end(skb, nest);
2048 nla_nest_cancel(skb, nest);
2052 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2053 u32 pid, u32 seq, int type, int flags)
2055 struct nlmsghdr *nlh;
2056 struct ndtmsg *ndtmsg;
2058 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2062 ndtmsg = nlmsg_data(nlh);
2064 read_lock_bh(&tbl->lock);
2065 ndtmsg->ndtm_family = tbl->family;
2066 ndtmsg->ndtm_pad1 = 0;
2067 ndtmsg->ndtm_pad2 = 0;
2069 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2070 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2071 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2072 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2073 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2074 goto nla_put_failure;
2076 unsigned long now = jiffies;
2077 long flush_delta = now - tbl->last_flush;
2078 long rand_delta = now - tbl->last_rand;
2079 struct neigh_hash_table *nht;
2080 struct ndt_config ndc = {
2081 .ndtc_key_len = tbl->key_len,
2082 .ndtc_entry_size = tbl->entry_size,
2083 .ndtc_entries = atomic_read(&tbl->entries),
2084 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2085 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2086 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2090 nht = rcu_dereference_bh(tbl->nht);
2091 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2092 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2093 rcu_read_unlock_bh();
2095 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2096 goto nla_put_failure;
2101 struct ndt_stats ndst;
2103 memset(&ndst, 0, sizeof(ndst));
2105 for_each_possible_cpu(cpu) {
2106 struct neigh_statistics *st;
2108 st = per_cpu_ptr(tbl->stats, cpu);
2109 ndst.ndts_allocs += st->allocs;
2110 ndst.ndts_destroys += st->destroys;
2111 ndst.ndts_hash_grows += st->hash_grows;
2112 ndst.ndts_res_failed += st->res_failed;
2113 ndst.ndts_lookups += st->lookups;
2114 ndst.ndts_hits += st->hits;
2115 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2116 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2117 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2118 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2119 ndst.ndts_table_fulls += st->table_fulls;
2122 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2124 goto nla_put_failure;
2127 BUG_ON(tbl->parms.dev);
2128 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2129 goto nla_put_failure;
2131 read_unlock_bh(&tbl->lock);
2132 nlmsg_end(skb, nlh);
2136 read_unlock_bh(&tbl->lock);
2137 nlmsg_cancel(skb, nlh);
2141 static int neightbl_fill_param_info(struct sk_buff *skb,
2142 struct neigh_table *tbl,
2143 struct neigh_parms *parms,
2144 u32 pid, u32 seq, int type,
2147 struct ndtmsg *ndtmsg;
2148 struct nlmsghdr *nlh;
2150 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2154 ndtmsg = nlmsg_data(nlh);
2156 read_lock_bh(&tbl->lock);
2157 ndtmsg->ndtm_family = tbl->family;
2158 ndtmsg->ndtm_pad1 = 0;
2159 ndtmsg->ndtm_pad2 = 0;
2161 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2162 neightbl_fill_parms(skb, parms) < 0)
2165 read_unlock_bh(&tbl->lock);
2166 nlmsg_end(skb, nlh);
2169 read_unlock_bh(&tbl->lock);
2170 nlmsg_cancel(skb, nlh);
2174 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2175 [NDTA_NAME] = { .type = NLA_STRING },
2176 [NDTA_THRESH1] = { .type = NLA_U32 },
2177 [NDTA_THRESH2] = { .type = NLA_U32 },
2178 [NDTA_THRESH3] = { .type = NLA_U32 },
2179 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2180 [NDTA_PARMS] = { .type = NLA_NESTED },
2183 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2184 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2185 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2186 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2187 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2188 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2189 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2190 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2191 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2192 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2193 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2194 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2195 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2196 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2197 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2200 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2201 struct netlink_ext_ack *extack)
2203 struct net *net = sock_net(skb->sk);
2204 struct neigh_table *tbl;
2205 struct ndtmsg *ndtmsg;
2206 struct nlattr *tb[NDTA_MAX+1];
2210 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2211 nl_neightbl_policy, extack);
2215 if (tb[NDTA_NAME] == NULL) {
2220 ndtmsg = nlmsg_data(nlh);
2222 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2223 tbl = neigh_tables[tidx];
2226 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2228 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2238 * We acquire tbl->lock to be nice to the periodic timers and
2239 * make sure they always see a consistent set of values.
2241 write_lock_bh(&tbl->lock);
2243 if (tb[NDTA_PARMS]) {
2244 struct nlattr *tbp[NDTPA_MAX+1];
2245 struct neigh_parms *p;
2248 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2250 nl_ntbl_parm_policy, extack);
2252 goto errout_tbl_lock;
2254 if (tbp[NDTPA_IFINDEX])
2255 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2257 p = lookup_neigh_parms(tbl, net, ifindex);
2260 goto errout_tbl_lock;
2263 for (i = 1; i <= NDTPA_MAX; i++) {
2268 case NDTPA_QUEUE_LEN:
2269 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2270 nla_get_u32(tbp[i]) *
2271 SKB_TRUESIZE(ETH_FRAME_LEN));
2273 case NDTPA_QUEUE_LENBYTES:
2274 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2275 nla_get_u32(tbp[i]));
2277 case NDTPA_PROXY_QLEN:
2278 NEIGH_VAR_SET(p, PROXY_QLEN,
2279 nla_get_u32(tbp[i]));
2281 case NDTPA_APP_PROBES:
2282 NEIGH_VAR_SET(p, APP_PROBES,
2283 nla_get_u32(tbp[i]));
2285 case NDTPA_UCAST_PROBES:
2286 NEIGH_VAR_SET(p, UCAST_PROBES,
2287 nla_get_u32(tbp[i]));
2289 case NDTPA_MCAST_PROBES:
2290 NEIGH_VAR_SET(p, MCAST_PROBES,
2291 nla_get_u32(tbp[i]));
2293 case NDTPA_MCAST_REPROBES:
2294 NEIGH_VAR_SET(p, MCAST_REPROBES,
2295 nla_get_u32(tbp[i]));
2297 case NDTPA_BASE_REACHABLE_TIME:
2298 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2299 nla_get_msecs(tbp[i]));
2300 /* update reachable_time as well, otherwise, the change will
2301 * only be effective after the next time neigh_periodic_work
2302 * decides to recompute it (can be multiple minutes)
2305 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2307 case NDTPA_GC_STALETIME:
2308 NEIGH_VAR_SET(p, GC_STALETIME,
2309 nla_get_msecs(tbp[i]));
2311 case NDTPA_DELAY_PROBE_TIME:
2312 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2313 nla_get_msecs(tbp[i]));
2314 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2316 case NDTPA_RETRANS_TIME:
2317 NEIGH_VAR_SET(p, RETRANS_TIME,
2318 nla_get_msecs(tbp[i]));
2320 case NDTPA_ANYCAST_DELAY:
2321 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2322 nla_get_msecs(tbp[i]));
2324 case NDTPA_PROXY_DELAY:
2325 NEIGH_VAR_SET(p, PROXY_DELAY,
2326 nla_get_msecs(tbp[i]));
2328 case NDTPA_LOCKTIME:
2329 NEIGH_VAR_SET(p, LOCKTIME,
2330 nla_get_msecs(tbp[i]));
2337 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2338 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2339 !net_eq(net, &init_net))
2340 goto errout_tbl_lock;
2342 if (tb[NDTA_THRESH1])
2343 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2345 if (tb[NDTA_THRESH2])
2346 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2348 if (tb[NDTA_THRESH3])
2349 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2351 if (tb[NDTA_GC_INTERVAL])
2352 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2357 write_unlock_bh(&tbl->lock);
2362 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2363 struct netlink_ext_ack *extack)
2365 struct ndtmsg *ndtm;
2367 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2368 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2372 ndtm = nlmsg_data(nlh);
2373 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2374 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2378 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2379 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2386 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2388 const struct nlmsghdr *nlh = cb->nlh;
2389 struct net *net = sock_net(skb->sk);
2390 int family, tidx, nidx = 0;
2391 int tbl_skip = cb->args[0];
2392 int neigh_skip = cb->args[1];
2393 struct neigh_table *tbl;
2395 if (cb->strict_check) {
2396 int err = neightbl_valid_dump_info(nlh, cb->extack);
2402 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2404 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2405 struct neigh_parms *p;
2407 tbl = neigh_tables[tidx];
2411 if (tidx < tbl_skip || (family && tbl->family != family))
2414 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2415 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2420 p = list_next_entry(&tbl->parms, list);
2421 list_for_each_entry_from(p, &tbl->parms_list, list) {
2422 if (!net_eq(neigh_parms_net(p), net))
2425 if (nidx < neigh_skip)
2428 if (neightbl_fill_param_info(skb, tbl, p,
2429 NETLINK_CB(cb->skb).portid,
2447 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2448 u32 pid, u32 seq, int type, unsigned int flags)
2450 unsigned long now = jiffies;
2451 struct nda_cacheinfo ci;
2452 struct nlmsghdr *nlh;
2455 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2459 ndm = nlmsg_data(nlh);
2460 ndm->ndm_family = neigh->ops->family;
2463 ndm->ndm_flags = neigh->flags;
2464 ndm->ndm_type = neigh->type;
2465 ndm->ndm_ifindex = neigh->dev->ifindex;
2467 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2468 goto nla_put_failure;
2470 read_lock_bh(&neigh->lock);
2471 ndm->ndm_state = neigh->nud_state;
2472 if (neigh->nud_state & NUD_VALID) {
2473 char haddr[MAX_ADDR_LEN];
2475 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2476 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2477 read_unlock_bh(&neigh->lock);
2478 goto nla_put_failure;
2482 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2483 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2484 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2485 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2486 read_unlock_bh(&neigh->lock);
2488 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2489 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2490 goto nla_put_failure;
2492 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2493 goto nla_put_failure;
2495 nlmsg_end(skb, nlh);
2499 nlmsg_cancel(skb, nlh);
2503 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2504 u32 pid, u32 seq, int type, unsigned int flags,
2505 struct neigh_table *tbl)
2507 struct nlmsghdr *nlh;
2510 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2514 ndm = nlmsg_data(nlh);
2515 ndm->ndm_family = tbl->family;
2518 ndm->ndm_flags = pn->flags | NTF_PROXY;
2519 ndm->ndm_type = RTN_UNICAST;
2520 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2521 ndm->ndm_state = NUD_NONE;
2523 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2524 goto nla_put_failure;
2526 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2527 goto nla_put_failure;
2529 nlmsg_end(skb, nlh);
2533 nlmsg_cancel(skb, nlh);
2537 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2539 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2540 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2543 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2545 struct net_device *master;
2550 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2552 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2553 * invalid value for ifindex to denote "no master".
2555 if (master_idx == -1)
2558 if (!master || master->ifindex != master_idx)
2564 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2566 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2572 struct neigh_dump_filter {
2577 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2578 struct netlink_callback *cb,
2579 struct neigh_dump_filter *filter)
2581 struct net *net = sock_net(skb->sk);
2582 struct neighbour *n;
2583 int rc, h, s_h = cb->args[1];
2584 int idx, s_idx = idx = cb->args[2];
2585 struct neigh_hash_table *nht;
2586 unsigned int flags = NLM_F_MULTI;
2588 if (filter->dev_idx || filter->master_idx)
2589 flags |= NLM_F_DUMP_FILTERED;
2592 nht = rcu_dereference_bh(tbl->nht);
2594 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2597 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2599 n = rcu_dereference_bh(n->next)) {
2600 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2602 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2603 neigh_master_filtered(n->dev, filter->master_idx))
2605 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2618 rcu_read_unlock_bh();
2624 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2625 struct netlink_callback *cb,
2626 struct neigh_dump_filter *filter)
2628 struct pneigh_entry *n;
2629 struct net *net = sock_net(skb->sk);
2630 int rc, h, s_h = cb->args[3];
2631 int idx, s_idx = idx = cb->args[4];
2632 unsigned int flags = NLM_F_MULTI;
2634 if (filter->dev_idx || filter->master_idx)
2635 flags |= NLM_F_DUMP_FILTERED;
2637 read_lock_bh(&tbl->lock);
2639 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2642 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2643 if (idx < s_idx || pneigh_net(n) != net)
2645 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2646 neigh_master_filtered(n->dev, filter->master_idx))
2648 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2650 RTM_NEWNEIGH, flags, tbl) < 0) {
2651 read_unlock_bh(&tbl->lock);
2660 read_unlock_bh(&tbl->lock);
2669 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2671 struct neigh_dump_filter *filter,
2672 struct netlink_ext_ack *extack)
2674 struct nlattr *tb[NDA_MAX + 1];
2680 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2681 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2685 ndm = nlmsg_data(nlh);
2686 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2687 ndm->ndm_state || ndm->ndm_type) {
2688 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2692 if (ndm->ndm_flags & ~NTF_PROXY) {
2693 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2697 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2698 tb, NDA_MAX, nda_policy,
2701 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2702 NDA_MAX, nda_policy, extack);
2707 for (i = 0; i <= NDA_MAX; ++i) {
2711 /* all new attributes should require strict_check */
2714 filter->dev_idx = nla_get_u32(tb[i]);
2717 filter->master_idx = nla_get_u32(tb[i]);
2721 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2730 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2732 const struct nlmsghdr *nlh = cb->nlh;
2733 struct neigh_dump_filter filter = {};
2734 struct neigh_table *tbl;
2739 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2741 /* check for full ndmsg structure presence, family member is
2742 * the same for both structures
2744 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2745 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2748 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2749 if (err < 0 && cb->strict_check)
2754 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2755 tbl = neigh_tables[t];
2759 if (t < s_t || (family && tbl->family != family))
2762 memset(&cb->args[1], 0, sizeof(cb->args) -
2763 sizeof(cb->args[0]));
2765 err = pneigh_dump_table(tbl, skb, cb, &filter);
2767 err = neigh_dump_table(tbl, skb, cb, &filter);
2776 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2777 struct neigh_table **tbl,
2778 void **dst, int *dev_idx, u8 *ndm_flags,
2779 struct netlink_ext_ack *extack)
2781 struct nlattr *tb[NDA_MAX + 1];
2785 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2786 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2790 ndm = nlmsg_data(nlh);
2791 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2793 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2797 if (ndm->ndm_flags & ~NTF_PROXY) {
2798 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2802 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2803 NDA_MAX, nda_policy, extack);
2807 *ndm_flags = ndm->ndm_flags;
2808 *dev_idx = ndm->ndm_ifindex;
2809 *tbl = neigh_find_table(ndm->ndm_family);
2811 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2812 return -EAFNOSUPPORT;
2815 for (i = 0; i <= NDA_MAX; ++i) {
2821 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2822 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2825 *dst = nla_data(tb[i]);
2828 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2836 static inline size_t neigh_nlmsg_size(void)
2838 return NLMSG_ALIGN(sizeof(struct ndmsg))
2839 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2840 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2841 + nla_total_size(sizeof(struct nda_cacheinfo))
2842 + nla_total_size(4) /* NDA_PROBES */
2843 + nla_total_size(1); /* NDA_PROTOCOL */
2846 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2849 struct sk_buff *skb;
2852 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2856 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2862 err = rtnl_unicast(skb, net, pid);
2867 static inline size_t pneigh_nlmsg_size(void)
2869 return NLMSG_ALIGN(sizeof(struct ndmsg))
2870 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2871 + nla_total_size(1); /* NDA_PROTOCOL */
2874 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2875 u32 pid, u32 seq, struct neigh_table *tbl)
2877 struct sk_buff *skb;
2880 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2884 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2890 err = rtnl_unicast(skb, net, pid);
2895 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2896 struct netlink_ext_ack *extack)
2898 struct net *net = sock_net(in_skb->sk);
2899 struct net_device *dev = NULL;
2900 struct neigh_table *tbl = NULL;
2901 struct neighbour *neigh;
2907 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2913 dev = __dev_get_by_index(net, dev_idx);
2915 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2921 NL_SET_ERR_MSG(extack, "Network address not specified");
2925 if (ndm_flags & NTF_PROXY) {
2926 struct pneigh_entry *pn;
2928 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2930 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2933 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2934 nlh->nlmsg_seq, tbl);
2938 NL_SET_ERR_MSG(extack, "No device specified");
2942 neigh = neigh_lookup(tbl, dst, dev);
2944 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2948 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2951 neigh_release(neigh);
2956 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2959 struct neigh_hash_table *nht;
2962 nht = rcu_dereference_bh(tbl->nht);
2964 read_lock(&tbl->lock); /* avoid resizes */
2965 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2966 struct neighbour *n;
2968 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2970 n = rcu_dereference_bh(n->next))
2973 read_unlock(&tbl->lock);
2974 rcu_read_unlock_bh();
2976 EXPORT_SYMBOL(neigh_for_each);
2978 /* The tbl->lock must be held as a writer and BH disabled. */
2979 void __neigh_for_each_release(struct neigh_table *tbl,
2980 int (*cb)(struct neighbour *))
2983 struct neigh_hash_table *nht;
2985 nht = rcu_dereference_protected(tbl->nht,
2986 lockdep_is_held(&tbl->lock));
2987 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2988 struct neighbour *n;
2989 struct neighbour __rcu **np;
2991 np = &nht->hash_buckets[chain];
2992 while ((n = rcu_dereference_protected(*np,
2993 lockdep_is_held(&tbl->lock))) != NULL) {
2996 write_lock(&n->lock);
2999 rcu_assign_pointer(*np,
3000 rcu_dereference_protected(n->next,
3001 lockdep_is_held(&tbl->lock)));
3005 write_unlock(&n->lock);
3007 neigh_cleanup_and_release(n);
3011 EXPORT_SYMBOL(__neigh_for_each_release);
3013 int neigh_xmit(int index, struct net_device *dev,
3014 const void *addr, struct sk_buff *skb)
3016 int err = -EAFNOSUPPORT;
3017 if (likely(index < NEIGH_NR_TABLES)) {
3018 struct neigh_table *tbl;
3019 struct neighbour *neigh;
3021 tbl = neigh_tables[index];
3025 if (index == NEIGH_ARP_TABLE) {
3026 u32 key = *((u32 *)addr);
3028 neigh = __ipv4_neigh_lookup_noref(dev, key);
3030 neigh = __neigh_lookup_noref(tbl, addr, dev);
3033 neigh = __neigh_create(tbl, addr, dev, false);
3034 err = PTR_ERR(neigh);
3035 if (IS_ERR(neigh)) {
3036 rcu_read_unlock_bh();
3039 err = neigh->output(neigh, skb);
3040 rcu_read_unlock_bh();
3042 else if (index == NEIGH_LINK_TABLE) {
3043 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3044 addr, NULL, skb->len);
3047 err = dev_queue_xmit(skb);
3055 EXPORT_SYMBOL(neigh_xmit);
3057 #ifdef CONFIG_PROC_FS
3059 static struct neighbour *neigh_get_first(struct seq_file *seq)
3061 struct neigh_seq_state *state = seq->private;
3062 struct net *net = seq_file_net(seq);
3063 struct neigh_hash_table *nht = state->nht;
3064 struct neighbour *n = NULL;
3067 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3068 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3069 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3072 if (!net_eq(dev_net(n->dev), net))
3074 if (state->neigh_sub_iter) {
3078 v = state->neigh_sub_iter(state, n, &fakep);
3082 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3084 if (n->nud_state & ~NUD_NOARP)
3087 n = rcu_dereference_bh(n->next);
3093 state->bucket = bucket;
3098 static struct neighbour *neigh_get_next(struct seq_file *seq,
3099 struct neighbour *n,
3102 struct neigh_seq_state *state = seq->private;
3103 struct net *net = seq_file_net(seq);
3104 struct neigh_hash_table *nht = state->nht;
3106 if (state->neigh_sub_iter) {
3107 void *v = state->neigh_sub_iter(state, n, pos);
3111 n = rcu_dereference_bh(n->next);
3115 if (!net_eq(dev_net(n->dev), net))
3117 if (state->neigh_sub_iter) {
3118 void *v = state->neigh_sub_iter(state, n, pos);
3123 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3126 if (n->nud_state & ~NUD_NOARP)
3129 n = rcu_dereference_bh(n->next);
3135 if (++state->bucket >= (1 << nht->hash_shift))
3138 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3146 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3148 struct neighbour *n = neigh_get_first(seq);
3153 n = neigh_get_next(seq, n, pos);
3158 return *pos ? NULL : n;
3161 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3163 struct neigh_seq_state *state = seq->private;
3164 struct net *net = seq_file_net(seq);
3165 struct neigh_table *tbl = state->tbl;
3166 struct pneigh_entry *pn = NULL;
3169 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3170 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3171 pn = tbl->phash_buckets[bucket];
3172 while (pn && !net_eq(pneigh_net(pn), net))
3177 state->bucket = bucket;
3182 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3183 struct pneigh_entry *pn,
3186 struct neigh_seq_state *state = seq->private;
3187 struct net *net = seq_file_net(seq);
3188 struct neigh_table *tbl = state->tbl;
3192 } while (pn && !net_eq(pneigh_net(pn), net));
3195 if (++state->bucket > PNEIGH_HASHMASK)
3197 pn = tbl->phash_buckets[state->bucket];
3198 while (pn && !net_eq(pneigh_net(pn), net))
3210 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3212 struct pneigh_entry *pn = pneigh_get_first(seq);
3217 pn = pneigh_get_next(seq, pn, pos);
3222 return *pos ? NULL : pn;
3225 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3227 struct neigh_seq_state *state = seq->private;
3229 loff_t idxpos = *pos;
3231 rc = neigh_get_idx(seq, &idxpos);
3232 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3233 rc = pneigh_get_idx(seq, &idxpos);
3238 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3239 __acquires(tbl->lock)
3242 struct neigh_seq_state *state = seq->private;
3246 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3249 state->nht = rcu_dereference_bh(tbl->nht);
3250 read_lock(&tbl->lock);
3252 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3254 EXPORT_SYMBOL(neigh_seq_start);
3256 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3258 struct neigh_seq_state *state;
3261 if (v == SEQ_START_TOKEN) {
3262 rc = neigh_get_first(seq);
3266 state = seq->private;
3267 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3268 rc = neigh_get_next(seq, v, NULL);
3271 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3272 rc = pneigh_get_first(seq);
3274 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3275 rc = pneigh_get_next(seq, v, NULL);
3281 EXPORT_SYMBOL(neigh_seq_next);
3283 void neigh_seq_stop(struct seq_file *seq, void *v)
3284 __releases(tbl->lock)
3287 struct neigh_seq_state *state = seq->private;
3288 struct neigh_table *tbl = state->tbl;
3290 read_unlock(&tbl->lock);
3291 rcu_read_unlock_bh();
3293 EXPORT_SYMBOL(neigh_seq_stop);
3295 /* statistics via seq_file */
3297 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3299 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3303 return SEQ_START_TOKEN;
3305 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3306 if (!cpu_possible(cpu))
3309 return per_cpu_ptr(tbl->stats, cpu);
3314 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3316 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3319 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3320 if (!cpu_possible(cpu))
3323 return per_cpu_ptr(tbl->stats, cpu);
3329 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3334 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3336 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3337 struct neigh_statistics *st = v;
3339 if (v == SEQ_START_TOKEN) {
3340 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3344 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3345 "%08lx %08lx %08lx "
3346 "%08lx %08lx %08lx\n",
3347 atomic_read(&tbl->entries),
3358 st->rcv_probes_mcast,
3359 st->rcv_probes_ucast,
3361 st->periodic_gc_runs,
3370 static const struct seq_operations neigh_stat_seq_ops = {
3371 .start = neigh_stat_seq_start,
3372 .next = neigh_stat_seq_next,
3373 .stop = neigh_stat_seq_stop,
3374 .show = neigh_stat_seq_show,
3376 #endif /* CONFIG_PROC_FS */
3378 static void __neigh_notify(struct neighbour *n, int type, int flags,
3381 struct net *net = dev_net(n->dev);
3382 struct sk_buff *skb;
3385 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3389 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3391 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3392 WARN_ON(err == -EMSGSIZE);
3396 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3400 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3403 void neigh_app_ns(struct neighbour *n)
3405 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3407 EXPORT_SYMBOL(neigh_app_ns);
3409 #ifdef CONFIG_SYSCTL
3410 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3412 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3413 void *buffer, size_t *lenp, loff_t *ppos)
3416 struct ctl_table tmp = *ctl;
3418 tmp.extra1 = SYSCTL_ZERO;
3419 tmp.extra2 = &unres_qlen_max;
3422 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3423 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3426 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3430 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3435 return __in_dev_arp_parms_get_rcu(dev);
3437 return __in6_dev_nd_parms_get_rcu(dev);
3442 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3445 struct net_device *dev;
3446 int family = neigh_parms_family(p);
3449 for_each_netdev_rcu(net, dev) {
3450 struct neigh_parms *dst_p =
3451 neigh_get_dev_parms_rcu(dev, family);
3453 if (dst_p && !test_bit(index, dst_p->data_state))
3454 dst_p->data[index] = p->data[index];
3459 static void neigh_proc_update(struct ctl_table *ctl, int write)
3461 struct net_device *dev = ctl->extra1;
3462 struct neigh_parms *p = ctl->extra2;
3463 struct net *net = neigh_parms_net(p);
3464 int index = (int *) ctl->data - p->data;
3469 set_bit(index, p->data_state);
3470 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3471 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3472 if (!dev) /* NULL dev means this is default value */
3473 neigh_copy_dflt_parms(net, p, index);
3476 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3477 void *buffer, size_t *lenp,
3480 struct ctl_table tmp = *ctl;
3483 tmp.extra1 = SYSCTL_ZERO;
3484 tmp.extra2 = SYSCTL_INT_MAX;
3486 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3487 neigh_proc_update(ctl, write);
3491 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3492 size_t *lenp, loff_t *ppos)
3494 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3496 neigh_proc_update(ctl, write);
3499 EXPORT_SYMBOL(neigh_proc_dointvec);
3501 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3502 size_t *lenp, loff_t *ppos)
3504 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3506 neigh_proc_update(ctl, write);
3509 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3511 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3512 void *buffer, size_t *lenp,
3515 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3517 neigh_proc_update(ctl, write);
3521 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3522 void *buffer, size_t *lenp, loff_t *ppos)
3524 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3526 neigh_proc_update(ctl, write);
3529 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3531 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3532 void *buffer, size_t *lenp,
3535 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3537 neigh_proc_update(ctl, write);
3541 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3542 void *buffer, size_t *lenp,
3545 struct neigh_parms *p = ctl->extra2;
3548 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3549 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3550 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3551 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3555 if (write && ret == 0) {
3556 /* update reachable_time as well, otherwise, the change will
3557 * only be effective after the next time neigh_periodic_work
3558 * decides to recompute it
3561 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3566 #define NEIGH_PARMS_DATA_OFFSET(index) \
3567 (&((struct neigh_parms *) 0)->data[index])
3569 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3570 [NEIGH_VAR_ ## attr] = { \
3572 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3573 .maxlen = sizeof(int), \
3575 .proc_handler = proc, \
3578 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3579 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3581 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3582 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3584 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3585 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3587 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3588 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3590 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3591 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3593 static struct neigh_sysctl_table {
3594 struct ctl_table_header *sysctl_header;
3595 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3596 } neigh_sysctl_template __read_mostly = {
3598 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3599 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3600 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3601 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3602 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3603 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3604 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3605 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3606 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3607 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3608 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3609 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3610 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3611 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3612 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3613 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3614 [NEIGH_VAR_GC_INTERVAL] = {
3615 .procname = "gc_interval",
3616 .maxlen = sizeof(int),
3618 .proc_handler = proc_dointvec_jiffies,
3620 [NEIGH_VAR_GC_THRESH1] = {
3621 .procname = "gc_thresh1",
3622 .maxlen = sizeof(int),
3624 .extra1 = SYSCTL_ZERO,
3625 .extra2 = SYSCTL_INT_MAX,
3626 .proc_handler = proc_dointvec_minmax,
3628 [NEIGH_VAR_GC_THRESH2] = {
3629 .procname = "gc_thresh2",
3630 .maxlen = sizeof(int),
3632 .extra1 = SYSCTL_ZERO,
3633 .extra2 = SYSCTL_INT_MAX,
3634 .proc_handler = proc_dointvec_minmax,
3636 [NEIGH_VAR_GC_THRESH3] = {
3637 .procname = "gc_thresh3",
3638 .maxlen = sizeof(int),
3640 .extra1 = SYSCTL_ZERO,
3641 .extra2 = SYSCTL_INT_MAX,
3642 .proc_handler = proc_dointvec_minmax,
3648 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3649 proc_handler *handler)
3652 struct neigh_sysctl_table *t;
3653 const char *dev_name_source;
3654 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3657 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3661 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3662 t->neigh_vars[i].data += (long) p;
3663 t->neigh_vars[i].extra1 = dev;
3664 t->neigh_vars[i].extra2 = p;
3668 dev_name_source = dev->name;
3669 /* Terminate the table early */
3670 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3671 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3673 struct neigh_table *tbl = p->tbl;
3674 dev_name_source = "default";
3675 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3676 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3677 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3678 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3683 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3685 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3686 /* RetransTime (in milliseconds)*/
3687 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3688 /* ReachableTime (in milliseconds) */
3689 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3691 /* Those handlers will update p->reachable_time after
3692 * base_reachable_time(_ms) is set to ensure the new timer starts being
3693 * applied after the next neighbour update instead of waiting for
3694 * neigh_periodic_work to update its value (can be multiple minutes)
3695 * So any handler that replaces them should do this as well
3698 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3699 neigh_proc_base_reachable_time;
3700 /* ReachableTime (in milliseconds) */
3701 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3702 neigh_proc_base_reachable_time;
3705 /* Don't export sysctls to unprivileged users */
3706 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3707 t->neigh_vars[0].procname = NULL;
3709 switch (neigh_parms_family(p)) {
3720 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3721 p_name, dev_name_source);
3723 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3724 if (!t->sysctl_header)
3727 p->sysctl_table = t;
3735 EXPORT_SYMBOL(neigh_sysctl_register);
3737 void neigh_sysctl_unregister(struct neigh_parms *p)
3739 if (p->sysctl_table) {
3740 struct neigh_sysctl_table *t = p->sysctl_table;
3741 p->sysctl_table = NULL;
3742 unregister_net_sysctl_table(t->sysctl_header);
3746 EXPORT_SYMBOL(neigh_sysctl_unregister);
3748 #endif /* CONFIG_SYSCTL */
3750 static int __init neigh_init(void)
3752 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3753 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3754 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3756 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3758 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3763 subsys_initcall(neigh_init);