1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 struct net_device *dev);
61 static const struct seq_operations neigh_stat_seq_ops;
65 Neighbour hash table buckets are protected with rwlock tbl->lock.
67 - All the scans/updates to hash buckets MUST be made under this lock.
68 - NOTHING clever should be made under this lock: no callbacks
69 to protocol backends, no attempts to send something to network.
70 It will result in deadlocks, if backend/driver wants to use neighbour
72 - If the entry requires some non-trivial actions, increase
73 its reference count and release table lock.
75 Neighbour entries are protected:
76 - with reference count.
77 - with rwlock neigh->lock
79 Reference count prevents destruction.
81 neigh->lock mainly serializes ll address data and its validity state.
82 However, the same lock is used to protect another entry fields:
86 Again, nothing clever shall be made under neigh->lock,
87 the most complicated procedure, which we allow is dev->hard_header.
88 It is supposed, that dev->hard_header is simplistic and does
89 not make callbacks to neighbour tables.
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 trace_neigh_cleanup_and_release(neigh, 0);
101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
118 static void neigh_mark_dead(struct neighbour *n)
121 if (!list_empty(&n->gc_list)) {
122 list_del_init(&n->gc_list);
123 atomic_dec(&n->tbl->gc_entries);
125 if (!list_empty(&n->managed_list))
126 list_del_init(&n->managed_list);
129 static void neigh_update_gc_list(struct neighbour *n)
131 bool on_gc_list, exempt_from_gc;
133 write_lock_bh(&n->tbl->lock);
134 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static void neigh_update_managed_list(struct neighbour *n)
160 bool on_managed_list, add_to_managed;
162 write_lock_bh(&n->tbl->lock);
163 write_lock(&n->lock);
167 add_to_managed = n->flags & NTF_MANAGED;
168 on_managed_list = !list_empty(&n->managed_list);
170 if (!add_to_managed && on_managed_list)
171 list_del_init(&n->managed_list);
172 else if (add_to_managed && !on_managed_list)
173 list_add_tail(&n->managed_list, &n->tbl->managed_list);
175 write_unlock(&n->lock);
176 write_unlock_bh(&n->tbl->lock);
179 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180 bool *gc_update, bool *managed_update)
182 u32 ndm_flags, old_flags = neigh->flags;
184 if (!(flags & NEIGH_UPDATE_F_ADMIN))
187 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
190 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
191 if (ndm_flags & NTF_EXT_LEARNED)
192 neigh->flags |= NTF_EXT_LEARNED;
194 neigh->flags &= ~NTF_EXT_LEARNED;
198 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199 if (ndm_flags & NTF_MANAGED)
200 neigh->flags |= NTF_MANAGED;
202 neigh->flags &= ~NTF_MANAGED;
204 *managed_update = true;
208 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
209 struct neigh_table *tbl)
213 write_lock(&n->lock);
214 if (refcount_read(&n->refcnt) == 1) {
215 struct neighbour *neigh;
217 neigh = rcu_dereference_protected(n->next,
218 lockdep_is_held(&tbl->lock));
219 rcu_assign_pointer(*np, neigh);
223 write_unlock(&n->lock);
225 neigh_cleanup_and_release(n);
229 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
231 struct neigh_hash_table *nht;
232 void *pkey = ndel->primary_key;
235 struct neighbour __rcu **np;
237 nht = rcu_dereference_protected(tbl->nht,
238 lockdep_is_held(&tbl->lock));
239 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
240 hash_val = hash_val >> (32 - nht->hash_shift);
242 np = &nht->hash_buckets[hash_val];
243 while ((n = rcu_dereference_protected(*np,
244 lockdep_is_held(&tbl->lock)))) {
246 return neigh_del(n, np, tbl);
252 static int neigh_forced_gc(struct neigh_table *tbl)
254 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
255 unsigned long tref = jiffies - 5 * HZ;
256 struct neighbour *n, *tmp;
259 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
261 write_lock_bh(&tbl->lock);
263 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
264 if (refcount_read(&n->refcnt) == 1) {
267 write_lock(&n->lock);
268 if ((n->nud_state == NUD_FAILED) ||
269 (n->nud_state == NUD_NOARP) ||
270 (tbl->is_multicast &&
271 tbl->is_multicast(n->primary_key)) ||
272 time_after(tref, n->updated))
274 write_unlock(&n->lock);
276 if (remove && neigh_remove_one(n, tbl))
278 if (shrunk >= max_clean)
283 tbl->last_flush = jiffies;
285 write_unlock_bh(&tbl->lock);
290 static void neigh_add_timer(struct neighbour *n, unsigned long when)
293 if (unlikely(mod_timer(&n->timer, when))) {
294 printk("NEIGH: BUG, double timer add, state is %x\n",
300 static int neigh_del_timer(struct neighbour *n)
302 if ((n->nud_state & NUD_IN_TIMER) &&
303 del_timer(&n->timer)) {
310 static void pneigh_queue_purge(struct sk_buff_head *list)
314 while ((skb = skb_dequeue(list)) != NULL) {
320 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
324 struct neigh_hash_table *nht;
326 nht = rcu_dereference_protected(tbl->nht,
327 lockdep_is_held(&tbl->lock));
329 for (i = 0; i < (1 << nht->hash_shift); i++) {
331 struct neighbour __rcu **np = &nht->hash_buckets[i];
333 while ((n = rcu_dereference_protected(*np,
334 lockdep_is_held(&tbl->lock))) != NULL) {
335 if (dev && n->dev != dev) {
339 if (skip_perm && n->nud_state & NUD_PERMANENT) {
343 rcu_assign_pointer(*np,
344 rcu_dereference_protected(n->next,
345 lockdep_is_held(&tbl->lock)));
346 write_lock(&n->lock);
349 if (refcount_read(&n->refcnt) != 1) {
350 /* The most unpleasant situation.
351 We must destroy neighbour entry,
352 but someone still uses it.
354 The destroy will be delayed until
355 the last user releases us, but
356 we must kill timers etc. and move
359 __skb_queue_purge(&n->arp_queue);
360 n->arp_queue_len_bytes = 0;
361 n->output = neigh_blackhole;
362 if (n->nud_state & NUD_VALID)
363 n->nud_state = NUD_NOARP;
365 n->nud_state = NUD_NONE;
366 neigh_dbg(2, "neigh %p is stray\n", n);
368 write_unlock(&n->lock);
369 neigh_cleanup_and_release(n);
374 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
376 write_lock_bh(&tbl->lock);
377 neigh_flush_dev(tbl, dev, false);
378 write_unlock_bh(&tbl->lock);
380 EXPORT_SYMBOL(neigh_changeaddr);
382 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
385 write_lock_bh(&tbl->lock);
386 neigh_flush_dev(tbl, dev, skip_perm);
387 pneigh_ifdown_and_unlock(tbl, dev);
389 del_timer_sync(&tbl->proxy_timer);
390 pneigh_queue_purge(&tbl->proxy_queue);
394 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
396 __neigh_ifdown(tbl, dev, true);
399 EXPORT_SYMBOL(neigh_carrier_down);
401 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
403 __neigh_ifdown(tbl, dev, false);
406 EXPORT_SYMBOL(neigh_ifdown);
408 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
409 struct net_device *dev,
410 u32 flags, bool exempt_from_gc)
412 struct neighbour *n = NULL;
413 unsigned long now = jiffies;
419 entries = atomic_inc_return(&tbl->gc_entries) - 1;
420 if (entries >= tbl->gc_thresh3 ||
421 (entries >= tbl->gc_thresh2 &&
422 time_after(now, tbl->last_flush + 5 * HZ))) {
423 if (!neigh_forced_gc(tbl) &&
424 entries >= tbl->gc_thresh3) {
425 net_info_ratelimited("%s: neighbor table overflow!\n",
427 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
433 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
437 __skb_queue_head_init(&n->arp_queue);
438 rwlock_init(&n->lock);
439 seqlock_init(&n->ha_lock);
440 n->updated = n->used = now;
441 n->nud_state = NUD_NONE;
442 n->output = neigh_blackhole;
444 seqlock_init(&n->hh.hh_lock);
445 n->parms = neigh_parms_clone(&tbl->parms);
446 timer_setup(&n->timer, neigh_timer_handler, 0);
448 NEIGH_CACHE_STAT_INC(tbl, allocs);
450 refcount_set(&n->refcnt, 1);
452 INIT_LIST_HEAD(&n->gc_list);
453 INIT_LIST_HEAD(&n->managed_list);
455 atomic_inc(&tbl->entries);
461 atomic_dec(&tbl->gc_entries);
465 static void neigh_get_hash_rnd(u32 *x)
467 *x = get_random_u32() | 1;
470 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
472 size_t size = (1 << shift) * sizeof(struct neighbour *);
473 struct neigh_hash_table *ret;
474 struct neighbour __rcu **buckets;
477 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
480 if (size <= PAGE_SIZE) {
481 buckets = kzalloc(size, GFP_ATOMIC);
483 buckets = (struct neighbour __rcu **)
484 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
486 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
492 ret->hash_buckets = buckets;
493 ret->hash_shift = shift;
494 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
495 neigh_get_hash_rnd(&ret->hash_rnd[i]);
499 static void neigh_hash_free_rcu(struct rcu_head *head)
501 struct neigh_hash_table *nht = container_of(head,
502 struct neigh_hash_table,
504 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
505 struct neighbour __rcu **buckets = nht->hash_buckets;
507 if (size <= PAGE_SIZE) {
510 kmemleak_free(buckets);
511 free_pages((unsigned long)buckets, get_order(size));
516 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
517 unsigned long new_shift)
519 unsigned int i, hash;
520 struct neigh_hash_table *new_nht, *old_nht;
522 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
524 old_nht = rcu_dereference_protected(tbl->nht,
525 lockdep_is_held(&tbl->lock));
526 new_nht = neigh_hash_alloc(new_shift);
530 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
531 struct neighbour *n, *next;
533 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
534 lockdep_is_held(&tbl->lock));
537 hash = tbl->hash(n->primary_key, n->dev,
540 hash >>= (32 - new_nht->hash_shift);
541 next = rcu_dereference_protected(n->next,
542 lockdep_is_held(&tbl->lock));
544 rcu_assign_pointer(n->next,
545 rcu_dereference_protected(
546 new_nht->hash_buckets[hash],
547 lockdep_is_held(&tbl->lock)));
548 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
552 rcu_assign_pointer(tbl->nht, new_nht);
553 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
557 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
558 struct net_device *dev)
562 NEIGH_CACHE_STAT_INC(tbl, lookups);
565 n = __neigh_lookup_noref(tbl, pkey, dev);
567 if (!refcount_inc_not_zero(&n->refcnt))
569 NEIGH_CACHE_STAT_INC(tbl, hits);
572 rcu_read_unlock_bh();
575 EXPORT_SYMBOL(neigh_lookup);
577 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
581 unsigned int key_len = tbl->key_len;
583 struct neigh_hash_table *nht;
585 NEIGH_CACHE_STAT_INC(tbl, lookups);
588 nht = rcu_dereference_bh(tbl->nht);
589 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
591 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
593 n = rcu_dereference_bh(n->next)) {
594 if (!memcmp(n->primary_key, pkey, key_len) &&
595 net_eq(dev_net(n->dev), net)) {
596 if (!refcount_inc_not_zero(&n->refcnt))
598 NEIGH_CACHE_STAT_INC(tbl, hits);
603 rcu_read_unlock_bh();
606 EXPORT_SYMBOL(neigh_lookup_nodev);
608 static struct neighbour *
609 ___neigh_create(struct neigh_table *tbl, const void *pkey,
610 struct net_device *dev, u32 flags,
611 bool exempt_from_gc, bool want_ref)
613 u32 hash_val, key_len = tbl->key_len;
614 struct neighbour *n1, *rc, *n;
615 struct neigh_hash_table *nht;
618 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
619 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
621 rc = ERR_PTR(-ENOBUFS);
625 memcpy(n->primary_key, pkey, key_len);
627 netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
629 /* Protocol specific setup. */
630 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
632 goto out_neigh_release;
635 if (dev->netdev_ops->ndo_neigh_construct) {
636 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
639 goto out_neigh_release;
643 /* Device specific setup. */
644 if (n->parms->neigh_setup &&
645 (error = n->parms->neigh_setup(n)) < 0) {
647 goto out_neigh_release;
650 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
652 write_lock_bh(&tbl->lock);
653 nht = rcu_dereference_protected(tbl->nht,
654 lockdep_is_held(&tbl->lock));
656 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
657 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
659 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
661 if (n->parms->dead) {
662 rc = ERR_PTR(-EINVAL);
666 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
667 lockdep_is_held(&tbl->lock));
669 n1 = rcu_dereference_protected(n1->next,
670 lockdep_is_held(&tbl->lock))) {
671 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
681 list_add_tail(&n->gc_list, &n->tbl->gc_list);
682 if (n->flags & NTF_MANAGED)
683 list_add_tail(&n->managed_list, &n->tbl->managed_list);
686 rcu_assign_pointer(n->next,
687 rcu_dereference_protected(nht->hash_buckets[hash_val],
688 lockdep_is_held(&tbl->lock)));
689 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
690 write_unlock_bh(&tbl->lock);
691 neigh_dbg(2, "neigh %p is created\n", n);
696 write_unlock_bh(&tbl->lock);
699 atomic_dec(&tbl->gc_entries);
704 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
705 struct net_device *dev, bool want_ref)
707 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
709 EXPORT_SYMBOL(__neigh_create);
711 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
713 u32 hash_val = *(u32 *)(pkey + key_len - 4);
714 hash_val ^= (hash_val >> 16);
715 hash_val ^= hash_val >> 8;
716 hash_val ^= hash_val >> 4;
717 hash_val &= PNEIGH_HASHMASK;
721 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
724 unsigned int key_len,
725 struct net_device *dev)
728 if (!memcmp(n->key, pkey, key_len) &&
729 net_eq(pneigh_net(n), net) &&
730 (n->dev == dev || !n->dev))
737 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
738 struct net *net, const void *pkey, struct net_device *dev)
740 unsigned int key_len = tbl->key_len;
741 u32 hash_val = pneigh_hash(pkey, key_len);
743 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
744 net, pkey, key_len, dev);
746 EXPORT_SYMBOL_GPL(__pneigh_lookup);
748 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
749 struct net *net, const void *pkey,
750 struct net_device *dev, int creat)
752 struct pneigh_entry *n;
753 unsigned int key_len = tbl->key_len;
754 u32 hash_val = pneigh_hash(pkey, key_len);
756 read_lock_bh(&tbl->lock);
757 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
758 net, pkey, key_len, dev);
759 read_unlock_bh(&tbl->lock);
766 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
770 write_pnet(&n->net, net);
771 memcpy(n->key, pkey, key_len);
773 netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
775 if (tbl->pconstructor && tbl->pconstructor(n)) {
776 netdev_put(dev, &n->dev_tracker);
782 write_lock_bh(&tbl->lock);
783 n->next = tbl->phash_buckets[hash_val];
784 tbl->phash_buckets[hash_val] = n;
785 write_unlock_bh(&tbl->lock);
789 EXPORT_SYMBOL(pneigh_lookup);
792 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
793 struct net_device *dev)
795 struct pneigh_entry *n, **np;
796 unsigned int key_len = tbl->key_len;
797 u32 hash_val = pneigh_hash(pkey, key_len);
799 write_lock_bh(&tbl->lock);
800 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
802 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
803 net_eq(pneigh_net(n), net)) {
805 write_unlock_bh(&tbl->lock);
806 if (tbl->pdestructor)
808 netdev_put(n->dev, &n->dev_tracker);
813 write_unlock_bh(&tbl->lock);
817 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
818 struct net_device *dev)
820 struct pneigh_entry *n, **np, *freelist = NULL;
823 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
824 np = &tbl->phash_buckets[h];
825 while ((n = *np) != NULL) {
826 if (!dev || n->dev == dev) {
835 write_unlock_bh(&tbl->lock);
836 while ((n = freelist)) {
839 if (tbl->pdestructor)
841 netdev_put(n->dev, &n->dev_tracker);
847 static void neigh_parms_destroy(struct neigh_parms *parms);
849 static inline void neigh_parms_put(struct neigh_parms *parms)
851 if (refcount_dec_and_test(&parms->refcnt))
852 neigh_parms_destroy(parms);
856 * neighbour must already be out of the table;
859 void neigh_destroy(struct neighbour *neigh)
861 struct net_device *dev = neigh->dev;
863 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
866 pr_warn("Destroying alive neighbour %p\n", neigh);
871 if (neigh_del_timer(neigh))
872 pr_warn("Impossible event\n");
874 write_lock_bh(&neigh->lock);
875 __skb_queue_purge(&neigh->arp_queue);
876 write_unlock_bh(&neigh->lock);
877 neigh->arp_queue_len_bytes = 0;
879 if (dev->netdev_ops->ndo_neigh_destroy)
880 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
882 netdev_put(dev, &neigh->dev_tracker);
883 neigh_parms_put(neigh->parms);
885 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
887 atomic_dec(&neigh->tbl->entries);
888 kfree_rcu(neigh, rcu);
890 EXPORT_SYMBOL(neigh_destroy);
892 /* Neighbour state is suspicious;
895 Called with write_locked neigh.
897 static void neigh_suspect(struct neighbour *neigh)
899 neigh_dbg(2, "neigh %p is suspected\n", neigh);
901 neigh->output = neigh->ops->output;
904 /* Neighbour state is OK;
907 Called with write_locked neigh.
909 static void neigh_connect(struct neighbour *neigh)
911 neigh_dbg(2, "neigh %p is connected\n", neigh);
913 neigh->output = neigh->ops->connected_output;
916 static void neigh_periodic_work(struct work_struct *work)
918 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
920 struct neighbour __rcu **np;
922 struct neigh_hash_table *nht;
924 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
926 write_lock_bh(&tbl->lock);
927 nht = rcu_dereference_protected(tbl->nht,
928 lockdep_is_held(&tbl->lock));
931 * periodically recompute ReachableTime from random function
934 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
935 struct neigh_parms *p;
936 tbl->last_rand = jiffies;
937 list_for_each_entry(p, &tbl->parms_list, list)
939 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
942 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
945 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
946 np = &nht->hash_buckets[i];
948 while ((n = rcu_dereference_protected(*np,
949 lockdep_is_held(&tbl->lock))) != NULL) {
952 write_lock(&n->lock);
954 state = n->nud_state;
955 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
956 (n->flags & NTF_EXT_LEARNED)) {
957 write_unlock(&n->lock);
961 if (time_before(n->used, n->confirmed))
962 n->used = n->confirmed;
964 if (refcount_read(&n->refcnt) == 1 &&
965 (state == NUD_FAILED ||
966 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
969 write_unlock(&n->lock);
970 neigh_cleanup_and_release(n);
973 write_unlock(&n->lock);
979 * It's fine to release lock here, even if hash table
980 * grows while we are preempted.
982 write_unlock_bh(&tbl->lock);
984 write_lock_bh(&tbl->lock);
985 nht = rcu_dereference_protected(tbl->nht,
986 lockdep_is_held(&tbl->lock));
989 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
990 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
991 * BASE_REACHABLE_TIME.
993 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
994 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
995 write_unlock_bh(&tbl->lock);
998 static __inline__ int neigh_max_probes(struct neighbour *n)
1000 struct neigh_parms *p = n->parms;
1001 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1002 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1003 NEIGH_VAR(p, MCAST_PROBES));
1006 static void neigh_invalidate(struct neighbour *neigh)
1007 __releases(neigh->lock)
1008 __acquires(neigh->lock)
1010 struct sk_buff *skb;
1012 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1013 neigh_dbg(2, "neigh %p is failed\n", neigh);
1014 neigh->updated = jiffies;
1016 /* It is very thin place. report_unreachable is very complicated
1017 routine. Particularly, it can hit the same neighbour entry!
1019 So that, we try to be accurate and avoid dead loop. --ANK
1021 while (neigh->nud_state == NUD_FAILED &&
1022 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1023 write_unlock(&neigh->lock);
1024 neigh->ops->error_report(neigh, skb);
1025 write_lock(&neigh->lock);
1027 __skb_queue_purge(&neigh->arp_queue);
1028 neigh->arp_queue_len_bytes = 0;
1031 static void neigh_probe(struct neighbour *neigh)
1032 __releases(neigh->lock)
1034 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1035 /* keep skb alive even if arp_queue overflows */
1037 skb = skb_clone(skb, GFP_ATOMIC);
1038 write_unlock(&neigh->lock);
1039 if (neigh->ops->solicit)
1040 neigh->ops->solicit(neigh, skb);
1041 atomic_inc(&neigh->probes);
1045 /* Called when a timer expires for a neighbour entry. */
1047 static void neigh_timer_handler(struct timer_list *t)
1049 unsigned long now, next;
1050 struct neighbour *neigh = from_timer(neigh, t, timer);
1054 write_lock(&neigh->lock);
1056 state = neigh->nud_state;
1060 if (!(state & NUD_IN_TIMER))
1063 if (state & NUD_REACHABLE) {
1064 if (time_before_eq(now,
1065 neigh->confirmed + neigh->parms->reachable_time)) {
1066 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1067 next = neigh->confirmed + neigh->parms->reachable_time;
1068 } else if (time_before_eq(now,
1070 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1071 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1072 neigh->nud_state = NUD_DELAY;
1073 neigh->updated = jiffies;
1074 neigh_suspect(neigh);
1075 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1077 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1078 neigh->nud_state = NUD_STALE;
1079 neigh->updated = jiffies;
1080 neigh_suspect(neigh);
1083 } else if (state & NUD_DELAY) {
1084 if (time_before_eq(now,
1086 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1087 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1088 neigh->nud_state = NUD_REACHABLE;
1089 neigh->updated = jiffies;
1090 neigh_connect(neigh);
1092 next = neigh->confirmed + neigh->parms->reachable_time;
1094 neigh_dbg(2, "neigh %p is probed\n", neigh);
1095 neigh->nud_state = NUD_PROBE;
1096 neigh->updated = jiffies;
1097 atomic_set(&neigh->probes, 0);
1099 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1103 /* NUD_PROBE|NUD_INCOMPLETE */
1104 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1107 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1108 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1109 neigh->nud_state = NUD_FAILED;
1111 neigh_invalidate(neigh);
1115 if (neigh->nud_state & NUD_IN_TIMER) {
1116 if (time_before(next, jiffies + HZ/100))
1117 next = jiffies + HZ/100;
1118 if (!mod_timer(&neigh->timer, next))
1121 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1125 write_unlock(&neigh->lock);
1129 neigh_update_notify(neigh, 0);
1131 trace_neigh_timer_handler(neigh, 0);
1133 neigh_release(neigh);
1136 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1137 const bool immediate_ok)
1140 bool immediate_probe = false;
1142 write_lock_bh(&neigh->lock);
1145 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1150 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1151 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1152 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1153 unsigned long next, now = jiffies;
1155 atomic_set(&neigh->probes,
1156 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1157 neigh_del_timer(neigh);
1158 neigh->nud_state = NUD_INCOMPLETE;
1159 neigh->updated = now;
1160 if (!immediate_ok) {
1163 immediate_probe = true;
1164 next = now + max(NEIGH_VAR(neigh->parms,
1168 neigh_add_timer(neigh, next);
1170 neigh->nud_state = NUD_FAILED;
1171 neigh->updated = jiffies;
1172 write_unlock_bh(&neigh->lock);
1174 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1177 } else if (neigh->nud_state & NUD_STALE) {
1178 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1179 neigh_del_timer(neigh);
1180 neigh->nud_state = NUD_DELAY;
1181 neigh->updated = jiffies;
1182 neigh_add_timer(neigh, jiffies +
1183 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1186 if (neigh->nud_state == NUD_INCOMPLETE) {
1188 while (neigh->arp_queue_len_bytes + skb->truesize >
1189 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1190 struct sk_buff *buff;
1192 buff = __skb_dequeue(&neigh->arp_queue);
1195 neigh->arp_queue_len_bytes -= buff->truesize;
1196 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1197 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1200 __skb_queue_tail(&neigh->arp_queue, skb);
1201 neigh->arp_queue_len_bytes += skb->truesize;
1206 if (immediate_probe)
1209 write_unlock(&neigh->lock);
1211 trace_neigh_event_send_done(neigh, rc);
1215 if (neigh->nud_state & NUD_STALE)
1217 write_unlock_bh(&neigh->lock);
1218 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1219 trace_neigh_event_send_dead(neigh, 1);
1222 EXPORT_SYMBOL(__neigh_event_send);
1224 static void neigh_update_hhs(struct neighbour *neigh)
1226 struct hh_cache *hh;
1227 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1230 if (neigh->dev->header_ops)
1231 update = neigh->dev->header_ops->cache_update;
1235 if (READ_ONCE(hh->hh_len)) {
1236 write_seqlock_bh(&hh->hh_lock);
1237 update(hh, neigh->dev, neigh->ha);
1238 write_sequnlock_bh(&hh->hh_lock);
1243 /* Generic update routine.
1244 -- lladdr is new lladdr or NULL, if it is not supplied.
1245 -- new is new state.
1247 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1249 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1250 lladdr instead of overriding it
1252 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1253 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1254 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1255 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1257 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1260 Caller MUST hold reference count on the entry.
1262 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1263 u8 new, u32 flags, u32 nlmsg_pid,
1264 struct netlink_ext_ack *extack)
1266 bool gc_update = false, managed_update = false;
1267 int update_isrouter = 0;
1268 struct net_device *dev;
1269 int err, notify = 0;
1272 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1274 write_lock_bh(&neigh->lock);
1277 old = neigh->nud_state;
1281 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1285 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1286 (old & (NUD_NOARP | NUD_PERMANENT)))
1289 neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
1290 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1291 new = old & ~NUD_PERMANENT;
1292 neigh->nud_state = new;
1297 if (!(new & NUD_VALID)) {
1298 neigh_del_timer(neigh);
1299 if (old & NUD_CONNECTED)
1300 neigh_suspect(neigh);
1301 neigh->nud_state = new;
1303 notify = old & NUD_VALID;
1304 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1305 (new & NUD_FAILED)) {
1306 neigh_invalidate(neigh);
1312 /* Compare new lladdr with cached one */
1313 if (!dev->addr_len) {
1314 /* First case: device needs no address. */
1316 } else if (lladdr) {
1317 /* The second case: if something is already cached
1318 and a new address is proposed:
1320 - if they are different, check override flag
1322 if ((old & NUD_VALID) &&
1323 !memcmp(lladdr, neigh->ha, dev->addr_len))
1326 /* No address is supplied; if we know something,
1327 use it, otherwise discard the request.
1330 if (!(old & NUD_VALID)) {
1331 NL_SET_ERR_MSG(extack, "No link layer address given");
1337 /* Update confirmed timestamp for neighbour entry after we
1338 * received ARP packet even if it doesn't change IP to MAC binding.
1340 if (new & NUD_CONNECTED)
1341 neigh->confirmed = jiffies;
1343 /* If entry was valid and address is not changed,
1344 do not change entry state, if new one is STALE.
1347 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1348 if (old & NUD_VALID) {
1349 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1350 update_isrouter = 0;
1351 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1352 (old & NUD_CONNECTED)) {
1358 if (lladdr == neigh->ha && new == NUD_STALE &&
1359 !(flags & NEIGH_UPDATE_F_ADMIN))
1364 /* Update timestamp only once we know we will make a change to the
1365 * neighbour entry. Otherwise we risk to move the locktime window with
1366 * noop updates and ignore relevant ARP updates.
1368 if (new != old || lladdr != neigh->ha)
1369 neigh->updated = jiffies;
1372 neigh_del_timer(neigh);
1373 if (new & NUD_PROBE)
1374 atomic_set(&neigh->probes, 0);
1375 if (new & NUD_IN_TIMER)
1376 neigh_add_timer(neigh, (jiffies +
1377 ((new & NUD_REACHABLE) ?
1378 neigh->parms->reachable_time :
1380 neigh->nud_state = new;
1384 if (lladdr != neigh->ha) {
1385 write_seqlock(&neigh->ha_lock);
1386 memcpy(&neigh->ha, lladdr, dev->addr_len);
1387 write_sequnlock(&neigh->ha_lock);
1388 neigh_update_hhs(neigh);
1389 if (!(new & NUD_CONNECTED))
1390 neigh->confirmed = jiffies -
1391 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1396 if (new & NUD_CONNECTED)
1397 neigh_connect(neigh);
1399 neigh_suspect(neigh);
1400 if (!(old & NUD_VALID)) {
1401 struct sk_buff *skb;
1403 /* Again: avoid dead loop if something went wrong */
1405 while (neigh->nud_state & NUD_VALID &&
1406 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1407 struct dst_entry *dst = skb_dst(skb);
1408 struct neighbour *n2, *n1 = neigh;
1409 write_unlock_bh(&neigh->lock);
1413 /* Why not just use 'neigh' as-is? The problem is that
1414 * things such as shaper, eql, and sch_teql can end up
1415 * using alternative, different, neigh objects to output
1416 * the packet in the output path. So what we need to do
1417 * here is re-lookup the top-level neigh in the path so
1418 * we can reinject the packet there.
1421 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1422 n2 = dst_neigh_lookup_skb(dst, skb);
1426 n1->output(n1, skb);
1431 write_lock_bh(&neigh->lock);
1433 __skb_queue_purge(&neigh->arp_queue);
1434 neigh->arp_queue_len_bytes = 0;
1437 if (update_isrouter)
1438 neigh_update_is_router(neigh, flags, ¬ify);
1439 write_unlock_bh(&neigh->lock);
1440 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1441 neigh_update_gc_list(neigh);
1443 neigh_update_managed_list(neigh);
1445 neigh_update_notify(neigh, nlmsg_pid);
1446 trace_neigh_update_done(neigh, err);
1450 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1451 u32 flags, u32 nlmsg_pid)
1453 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1455 EXPORT_SYMBOL(neigh_update);
1457 /* Update the neigh to listen temporarily for probe responses, even if it is
1458 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1460 void __neigh_set_probe_once(struct neighbour *neigh)
1464 neigh->updated = jiffies;
1465 if (!(neigh->nud_state & NUD_FAILED))
1467 neigh->nud_state = NUD_INCOMPLETE;
1468 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1469 neigh_add_timer(neigh,
1470 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1473 EXPORT_SYMBOL(__neigh_set_probe_once);
1475 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1476 u8 *lladdr, void *saddr,
1477 struct net_device *dev)
1479 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1480 lladdr || !dev->addr_len);
1482 neigh_update(neigh, lladdr, NUD_STALE,
1483 NEIGH_UPDATE_F_OVERRIDE, 0);
1486 EXPORT_SYMBOL(neigh_event_ns);
1488 /* called with read_lock_bh(&n->lock); */
1489 static void neigh_hh_init(struct neighbour *n)
1491 struct net_device *dev = n->dev;
1492 __be16 prot = n->tbl->protocol;
1493 struct hh_cache *hh = &n->hh;
1495 write_lock_bh(&n->lock);
1497 /* Only one thread can come in here and initialize the
1501 dev->header_ops->cache(n, hh, prot);
1503 write_unlock_bh(&n->lock);
1506 /* Slow and careful. */
1508 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1512 if (!neigh_event_send(neigh, skb)) {
1514 struct net_device *dev = neigh->dev;
1517 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1518 neigh_hh_init(neigh);
1521 __skb_pull(skb, skb_network_offset(skb));
1522 seq = read_seqbegin(&neigh->ha_lock);
1523 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1524 neigh->ha, NULL, skb->len);
1525 } while (read_seqretry(&neigh->ha_lock, seq));
1528 rc = dev_queue_xmit(skb);
1539 EXPORT_SYMBOL(neigh_resolve_output);
1541 /* As fast as possible without hh cache */
1543 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1545 struct net_device *dev = neigh->dev;
1550 __skb_pull(skb, skb_network_offset(skb));
1551 seq = read_seqbegin(&neigh->ha_lock);
1552 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1553 neigh->ha, NULL, skb->len);
1554 } while (read_seqretry(&neigh->ha_lock, seq));
1557 err = dev_queue_xmit(skb);
1564 EXPORT_SYMBOL(neigh_connected_output);
1566 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1568 return dev_queue_xmit(skb);
1570 EXPORT_SYMBOL(neigh_direct_output);
1572 static void neigh_managed_work(struct work_struct *work)
1574 struct neigh_table *tbl = container_of(work, struct neigh_table,
1576 struct neighbour *neigh;
1578 write_lock_bh(&tbl->lock);
1579 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1580 neigh_event_send_probe(neigh, NULL, false);
1581 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1582 NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1583 write_unlock_bh(&tbl->lock);
1586 static void neigh_proxy_process(struct timer_list *t)
1588 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1589 long sched_next = 0;
1590 unsigned long now = jiffies;
1591 struct sk_buff *skb, *n;
1593 spin_lock(&tbl->proxy_queue.lock);
1595 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1596 long tdif = NEIGH_CB(skb)->sched_next - now;
1599 struct net_device *dev = skb->dev;
1601 __skb_unlink(skb, &tbl->proxy_queue);
1602 if (tbl->proxy_redo && netif_running(dev)) {
1604 tbl->proxy_redo(skb);
1611 } else if (!sched_next || tdif < sched_next)
1614 del_timer(&tbl->proxy_timer);
1616 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1617 spin_unlock(&tbl->proxy_queue.lock);
1620 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1621 struct sk_buff *skb)
1623 unsigned long sched_next = jiffies +
1624 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
1626 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1631 NEIGH_CB(skb)->sched_next = sched_next;
1632 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1634 spin_lock(&tbl->proxy_queue.lock);
1635 if (del_timer(&tbl->proxy_timer)) {
1636 if (time_before(tbl->proxy_timer.expires, sched_next))
1637 sched_next = tbl->proxy_timer.expires;
1641 __skb_queue_tail(&tbl->proxy_queue, skb);
1642 mod_timer(&tbl->proxy_timer, sched_next);
1643 spin_unlock(&tbl->proxy_queue.lock);
1645 EXPORT_SYMBOL(pneigh_enqueue);
1647 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1648 struct net *net, int ifindex)
1650 struct neigh_parms *p;
1652 list_for_each_entry(p, &tbl->parms_list, list) {
1653 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1654 (!p->dev && !ifindex && net_eq(net, &init_net)))
1661 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1662 struct neigh_table *tbl)
1664 struct neigh_parms *p;
1665 struct net *net = dev_net(dev);
1666 const struct net_device_ops *ops = dev->netdev_ops;
1668 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1671 refcount_set(&p->refcnt, 1);
1673 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1674 netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1676 write_pnet(&p->net, net);
1677 p->sysctl_table = NULL;
1679 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1680 netdev_put(dev, &p->dev_tracker);
1685 write_lock_bh(&tbl->lock);
1686 list_add(&p->list, &tbl->parms.list);
1687 write_unlock_bh(&tbl->lock);
1689 neigh_parms_data_state_cleanall(p);
1693 EXPORT_SYMBOL(neigh_parms_alloc);
1695 static void neigh_rcu_free_parms(struct rcu_head *head)
1697 struct neigh_parms *parms =
1698 container_of(head, struct neigh_parms, rcu_head);
1700 neigh_parms_put(parms);
1703 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1705 if (!parms || parms == &tbl->parms)
1707 write_lock_bh(&tbl->lock);
1708 list_del(&parms->list);
1710 write_unlock_bh(&tbl->lock);
1711 netdev_put(parms->dev, &parms->dev_tracker);
1712 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1714 EXPORT_SYMBOL(neigh_parms_release);
1716 static void neigh_parms_destroy(struct neigh_parms *parms)
1721 static struct lock_class_key neigh_table_proxy_queue_class;
1723 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1725 void neigh_table_init(int index, struct neigh_table *tbl)
1727 unsigned long now = jiffies;
1728 unsigned long phsize;
1730 INIT_LIST_HEAD(&tbl->parms_list);
1731 INIT_LIST_HEAD(&tbl->gc_list);
1732 INIT_LIST_HEAD(&tbl->managed_list);
1734 list_add(&tbl->parms.list, &tbl->parms_list);
1735 write_pnet(&tbl->parms.net, &init_net);
1736 refcount_set(&tbl->parms.refcnt, 1);
1737 tbl->parms.reachable_time =
1738 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1740 tbl->stats = alloc_percpu(struct neigh_statistics);
1742 panic("cannot create neighbour cache statistics");
1744 #ifdef CONFIG_PROC_FS
1745 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1746 &neigh_stat_seq_ops, tbl))
1747 panic("cannot create neighbour proc dir entry");
1750 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1752 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1753 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1755 if (!tbl->nht || !tbl->phash_buckets)
1756 panic("cannot allocate neighbour cache hashes");
1758 if (!tbl->entry_size)
1759 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1760 tbl->key_len, NEIGH_PRIV_ALIGN);
1762 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1764 rwlock_init(&tbl->lock);
1766 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1767 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1768 tbl->parms.reachable_time);
1769 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1770 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1772 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1773 skb_queue_head_init_class(&tbl->proxy_queue,
1774 &neigh_table_proxy_queue_class);
1776 tbl->last_flush = now;
1777 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1779 neigh_tables[index] = tbl;
1781 EXPORT_SYMBOL(neigh_table_init);
1783 int neigh_table_clear(int index, struct neigh_table *tbl)
1785 neigh_tables[index] = NULL;
1786 /* It is not clean... Fix it to unload IPv6 module safely */
1787 cancel_delayed_work_sync(&tbl->managed_work);
1788 cancel_delayed_work_sync(&tbl->gc_work);
1789 del_timer_sync(&tbl->proxy_timer);
1790 pneigh_queue_purge(&tbl->proxy_queue);
1791 neigh_ifdown(tbl, NULL);
1792 if (atomic_read(&tbl->entries))
1793 pr_crit("neighbour leakage\n");
1795 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1796 neigh_hash_free_rcu);
1799 kfree(tbl->phash_buckets);
1800 tbl->phash_buckets = NULL;
1802 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1804 free_percpu(tbl->stats);
1809 EXPORT_SYMBOL(neigh_table_clear);
1811 static struct neigh_table *neigh_find_table(int family)
1813 struct neigh_table *tbl = NULL;
1817 tbl = neigh_tables[NEIGH_ARP_TABLE];
1820 tbl = neigh_tables[NEIGH_ND_TABLE];
1823 tbl = neigh_tables[NEIGH_DN_TABLE];
1830 const struct nla_policy nda_policy[NDA_MAX+1] = {
1831 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1832 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1833 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1834 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1835 [NDA_PROBES] = { .type = NLA_U32 },
1836 [NDA_VLAN] = { .type = NLA_U16 },
1837 [NDA_PORT] = { .type = NLA_U16 },
1838 [NDA_VNI] = { .type = NLA_U32 },
1839 [NDA_IFINDEX] = { .type = NLA_U32 },
1840 [NDA_MASTER] = { .type = NLA_U32 },
1841 [NDA_PROTOCOL] = { .type = NLA_U8 },
1842 [NDA_NH_ID] = { .type = NLA_U32 },
1843 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1844 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1847 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1848 struct netlink_ext_ack *extack)
1850 struct net *net = sock_net(skb->sk);
1852 struct nlattr *dst_attr;
1853 struct neigh_table *tbl;
1854 struct neighbour *neigh;
1855 struct net_device *dev = NULL;
1859 if (nlmsg_len(nlh) < sizeof(*ndm))
1862 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1864 NL_SET_ERR_MSG(extack, "Network address not specified");
1868 ndm = nlmsg_data(nlh);
1869 if (ndm->ndm_ifindex) {
1870 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1877 tbl = neigh_find_table(ndm->ndm_family);
1879 return -EAFNOSUPPORT;
1881 if (nla_len(dst_attr) < (int)tbl->key_len) {
1882 NL_SET_ERR_MSG(extack, "Invalid network address");
1886 if (ndm->ndm_flags & NTF_PROXY) {
1887 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1894 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1895 if (neigh == NULL) {
1900 err = __neigh_update(neigh, NULL, NUD_FAILED,
1901 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1902 NETLINK_CB(skb).portid, extack);
1903 write_lock_bh(&tbl->lock);
1904 neigh_release(neigh);
1905 neigh_remove_one(neigh, tbl);
1906 write_unlock_bh(&tbl->lock);
1912 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1913 struct netlink_ext_ack *extack)
1915 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1916 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1917 struct net *net = sock_net(skb->sk);
1919 struct nlattr *tb[NDA_MAX+1];
1920 struct neigh_table *tbl;
1921 struct net_device *dev = NULL;
1922 struct neighbour *neigh;
1929 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1930 nda_policy, extack);
1936 NL_SET_ERR_MSG(extack, "Network address not specified");
1940 ndm = nlmsg_data(nlh);
1941 ndm_flags = ndm->ndm_flags;
1942 if (tb[NDA_FLAGS_EXT]) {
1943 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1945 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1946 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1947 hweight32(NTF_EXT_MASK)));
1948 ndm_flags |= (ext << NTF_EXT_SHIFT);
1950 if (ndm->ndm_ifindex) {
1951 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1957 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1958 NL_SET_ERR_MSG(extack, "Invalid link address");
1963 tbl = neigh_find_table(ndm->ndm_family);
1965 return -EAFNOSUPPORT;
1967 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1968 NL_SET_ERR_MSG(extack, "Invalid network address");
1972 dst = nla_data(tb[NDA_DST]);
1973 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1975 if (tb[NDA_PROTOCOL])
1976 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1977 if (ndm_flags & NTF_PROXY) {
1978 struct pneigh_entry *pn;
1980 if (ndm_flags & NTF_MANAGED) {
1981 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
1986 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1988 pn->flags = ndm_flags;
1990 pn->protocol = protocol;
1997 NL_SET_ERR_MSG(extack, "Device not specified");
2001 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2006 neigh = neigh_lookup(tbl, dst, dev);
2007 if (neigh == NULL) {
2008 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2009 bool exempt_from_gc = ndm_permanent ||
2010 ndm_flags & NTF_EXT_LEARNED;
2012 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2016 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2017 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2022 neigh = ___neigh_create(tbl, dst, dev,
2024 (NTF_EXT_LEARNED | NTF_MANAGED),
2025 exempt_from_gc, true);
2026 if (IS_ERR(neigh)) {
2027 err = PTR_ERR(neigh);
2031 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2033 neigh_release(neigh);
2037 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2038 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2039 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2043 neigh->protocol = protocol;
2044 if (ndm_flags & NTF_EXT_LEARNED)
2045 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2046 if (ndm_flags & NTF_ROUTER)
2047 flags |= NEIGH_UPDATE_F_ISROUTER;
2048 if (ndm_flags & NTF_MANAGED)
2049 flags |= NEIGH_UPDATE_F_MANAGED;
2050 if (ndm_flags & NTF_USE)
2051 flags |= NEIGH_UPDATE_F_USE;
2053 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2054 NETLINK_CB(skb).portid, extack);
2055 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2056 neigh_event_send(neigh, NULL);
2059 neigh_release(neigh);
2064 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2066 struct nlattr *nest;
2068 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2073 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2074 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2075 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2076 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2077 /* approximative value for deprecated QUEUE_LEN (in packets) */
2078 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2079 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2080 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2081 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2082 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2083 NEIGH_VAR(parms, UCAST_PROBES)) ||
2084 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2085 NEIGH_VAR(parms, MCAST_PROBES)) ||
2086 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2087 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2088 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2090 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2091 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2092 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2093 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2094 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2095 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2096 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2097 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2098 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2099 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2100 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2101 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2102 nla_put_msecs(skb, NDTPA_LOCKTIME,
2103 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2104 nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2105 NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2106 goto nla_put_failure;
2107 return nla_nest_end(skb, nest);
2110 nla_nest_cancel(skb, nest);
2114 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2115 u32 pid, u32 seq, int type, int flags)
2117 struct nlmsghdr *nlh;
2118 struct ndtmsg *ndtmsg;
2120 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2124 ndtmsg = nlmsg_data(nlh);
2126 read_lock_bh(&tbl->lock);
2127 ndtmsg->ndtm_family = tbl->family;
2128 ndtmsg->ndtm_pad1 = 0;
2129 ndtmsg->ndtm_pad2 = 0;
2131 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2132 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2133 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2134 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2135 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2136 goto nla_put_failure;
2138 unsigned long now = jiffies;
2139 long flush_delta = now - tbl->last_flush;
2140 long rand_delta = now - tbl->last_rand;
2141 struct neigh_hash_table *nht;
2142 struct ndt_config ndc = {
2143 .ndtc_key_len = tbl->key_len,
2144 .ndtc_entry_size = tbl->entry_size,
2145 .ndtc_entries = atomic_read(&tbl->entries),
2146 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2147 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2148 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2152 nht = rcu_dereference_bh(tbl->nht);
2153 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2154 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2155 rcu_read_unlock_bh();
2157 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2158 goto nla_put_failure;
2163 struct ndt_stats ndst;
2165 memset(&ndst, 0, sizeof(ndst));
2167 for_each_possible_cpu(cpu) {
2168 struct neigh_statistics *st;
2170 st = per_cpu_ptr(tbl->stats, cpu);
2171 ndst.ndts_allocs += st->allocs;
2172 ndst.ndts_destroys += st->destroys;
2173 ndst.ndts_hash_grows += st->hash_grows;
2174 ndst.ndts_res_failed += st->res_failed;
2175 ndst.ndts_lookups += st->lookups;
2176 ndst.ndts_hits += st->hits;
2177 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2178 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2179 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2180 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2181 ndst.ndts_table_fulls += st->table_fulls;
2184 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2186 goto nla_put_failure;
2189 BUG_ON(tbl->parms.dev);
2190 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2191 goto nla_put_failure;
2193 read_unlock_bh(&tbl->lock);
2194 nlmsg_end(skb, nlh);
2198 read_unlock_bh(&tbl->lock);
2199 nlmsg_cancel(skb, nlh);
2203 static int neightbl_fill_param_info(struct sk_buff *skb,
2204 struct neigh_table *tbl,
2205 struct neigh_parms *parms,
2206 u32 pid, u32 seq, int type,
2209 struct ndtmsg *ndtmsg;
2210 struct nlmsghdr *nlh;
2212 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2216 ndtmsg = nlmsg_data(nlh);
2218 read_lock_bh(&tbl->lock);
2219 ndtmsg->ndtm_family = tbl->family;
2220 ndtmsg->ndtm_pad1 = 0;
2221 ndtmsg->ndtm_pad2 = 0;
2223 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2224 neightbl_fill_parms(skb, parms) < 0)
2227 read_unlock_bh(&tbl->lock);
2228 nlmsg_end(skb, nlh);
2231 read_unlock_bh(&tbl->lock);
2232 nlmsg_cancel(skb, nlh);
2236 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2237 [NDTA_NAME] = { .type = NLA_STRING },
2238 [NDTA_THRESH1] = { .type = NLA_U32 },
2239 [NDTA_THRESH2] = { .type = NLA_U32 },
2240 [NDTA_THRESH3] = { .type = NLA_U32 },
2241 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2242 [NDTA_PARMS] = { .type = NLA_NESTED },
2245 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2246 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2247 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2248 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2249 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2250 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2251 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2252 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2253 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2254 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2255 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2256 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2257 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2258 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2259 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2260 [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
2263 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2264 struct netlink_ext_ack *extack)
2266 struct net *net = sock_net(skb->sk);
2267 struct neigh_table *tbl;
2268 struct ndtmsg *ndtmsg;
2269 struct nlattr *tb[NDTA_MAX+1];
2273 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2274 nl_neightbl_policy, extack);
2278 if (tb[NDTA_NAME] == NULL) {
2283 ndtmsg = nlmsg_data(nlh);
2285 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2286 tbl = neigh_tables[tidx];
2289 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2291 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2301 * We acquire tbl->lock to be nice to the periodic timers and
2302 * make sure they always see a consistent set of values.
2304 write_lock_bh(&tbl->lock);
2306 if (tb[NDTA_PARMS]) {
2307 struct nlattr *tbp[NDTPA_MAX+1];
2308 struct neigh_parms *p;
2311 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2313 nl_ntbl_parm_policy, extack);
2315 goto errout_tbl_lock;
2317 if (tbp[NDTPA_IFINDEX])
2318 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2320 p = lookup_neigh_parms(tbl, net, ifindex);
2323 goto errout_tbl_lock;
2326 for (i = 1; i <= NDTPA_MAX; i++) {
2331 case NDTPA_QUEUE_LEN:
2332 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2333 nla_get_u32(tbp[i]) *
2334 SKB_TRUESIZE(ETH_FRAME_LEN));
2336 case NDTPA_QUEUE_LENBYTES:
2337 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2338 nla_get_u32(tbp[i]));
2340 case NDTPA_PROXY_QLEN:
2341 NEIGH_VAR_SET(p, PROXY_QLEN,
2342 nla_get_u32(tbp[i]));
2344 case NDTPA_APP_PROBES:
2345 NEIGH_VAR_SET(p, APP_PROBES,
2346 nla_get_u32(tbp[i]));
2348 case NDTPA_UCAST_PROBES:
2349 NEIGH_VAR_SET(p, UCAST_PROBES,
2350 nla_get_u32(tbp[i]));
2352 case NDTPA_MCAST_PROBES:
2353 NEIGH_VAR_SET(p, MCAST_PROBES,
2354 nla_get_u32(tbp[i]));
2356 case NDTPA_MCAST_REPROBES:
2357 NEIGH_VAR_SET(p, MCAST_REPROBES,
2358 nla_get_u32(tbp[i]));
2360 case NDTPA_BASE_REACHABLE_TIME:
2361 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2362 nla_get_msecs(tbp[i]));
2363 /* update reachable_time as well, otherwise, the change will
2364 * only be effective after the next time neigh_periodic_work
2365 * decides to recompute it (can be multiple minutes)
2368 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2370 case NDTPA_GC_STALETIME:
2371 NEIGH_VAR_SET(p, GC_STALETIME,
2372 nla_get_msecs(tbp[i]));
2374 case NDTPA_DELAY_PROBE_TIME:
2375 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2376 nla_get_msecs(tbp[i]));
2377 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2379 case NDTPA_INTERVAL_PROBE_TIME_MS:
2380 NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2381 nla_get_msecs(tbp[i]));
2383 case NDTPA_RETRANS_TIME:
2384 NEIGH_VAR_SET(p, RETRANS_TIME,
2385 nla_get_msecs(tbp[i]));
2387 case NDTPA_ANYCAST_DELAY:
2388 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2389 nla_get_msecs(tbp[i]));
2391 case NDTPA_PROXY_DELAY:
2392 NEIGH_VAR_SET(p, PROXY_DELAY,
2393 nla_get_msecs(tbp[i]));
2395 case NDTPA_LOCKTIME:
2396 NEIGH_VAR_SET(p, LOCKTIME,
2397 nla_get_msecs(tbp[i]));
2404 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2405 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2406 !net_eq(net, &init_net))
2407 goto errout_tbl_lock;
2409 if (tb[NDTA_THRESH1])
2410 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2412 if (tb[NDTA_THRESH2])
2413 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2415 if (tb[NDTA_THRESH3])
2416 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2418 if (tb[NDTA_GC_INTERVAL])
2419 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2424 write_unlock_bh(&tbl->lock);
2429 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2430 struct netlink_ext_ack *extack)
2432 struct ndtmsg *ndtm;
2434 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2435 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2439 ndtm = nlmsg_data(nlh);
2440 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2441 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2445 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2446 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2453 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2455 const struct nlmsghdr *nlh = cb->nlh;
2456 struct net *net = sock_net(skb->sk);
2457 int family, tidx, nidx = 0;
2458 int tbl_skip = cb->args[0];
2459 int neigh_skip = cb->args[1];
2460 struct neigh_table *tbl;
2462 if (cb->strict_check) {
2463 int err = neightbl_valid_dump_info(nlh, cb->extack);
2469 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2471 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2472 struct neigh_parms *p;
2474 tbl = neigh_tables[tidx];
2478 if (tidx < tbl_skip || (family && tbl->family != family))
2481 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2482 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2487 p = list_next_entry(&tbl->parms, list);
2488 list_for_each_entry_from(p, &tbl->parms_list, list) {
2489 if (!net_eq(neigh_parms_net(p), net))
2492 if (nidx < neigh_skip)
2495 if (neightbl_fill_param_info(skb, tbl, p,
2496 NETLINK_CB(cb->skb).portid,
2514 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2515 u32 pid, u32 seq, int type, unsigned int flags)
2517 u32 neigh_flags, neigh_flags_ext;
2518 unsigned long now = jiffies;
2519 struct nda_cacheinfo ci;
2520 struct nlmsghdr *nlh;
2523 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2527 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2528 neigh_flags = neigh->flags & NTF_OLD_MASK;
2530 ndm = nlmsg_data(nlh);
2531 ndm->ndm_family = neigh->ops->family;
2534 ndm->ndm_flags = neigh_flags;
2535 ndm->ndm_type = neigh->type;
2536 ndm->ndm_ifindex = neigh->dev->ifindex;
2538 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2539 goto nla_put_failure;
2541 read_lock_bh(&neigh->lock);
2542 ndm->ndm_state = neigh->nud_state;
2543 if (neigh->nud_state & NUD_VALID) {
2544 char haddr[MAX_ADDR_LEN];
2546 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2547 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2548 read_unlock_bh(&neigh->lock);
2549 goto nla_put_failure;
2553 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2554 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2555 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2556 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2557 read_unlock_bh(&neigh->lock);
2559 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2560 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2561 goto nla_put_failure;
2563 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2564 goto nla_put_failure;
2565 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2566 goto nla_put_failure;
2568 nlmsg_end(skb, nlh);
2572 nlmsg_cancel(skb, nlh);
2576 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2577 u32 pid, u32 seq, int type, unsigned int flags,
2578 struct neigh_table *tbl)
2580 u32 neigh_flags, neigh_flags_ext;
2581 struct nlmsghdr *nlh;
2584 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2588 neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2589 neigh_flags = pn->flags & NTF_OLD_MASK;
2591 ndm = nlmsg_data(nlh);
2592 ndm->ndm_family = tbl->family;
2595 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2596 ndm->ndm_type = RTN_UNICAST;
2597 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2598 ndm->ndm_state = NUD_NONE;
2600 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2601 goto nla_put_failure;
2603 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2604 goto nla_put_failure;
2605 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2606 goto nla_put_failure;
2608 nlmsg_end(skb, nlh);
2612 nlmsg_cancel(skb, nlh);
2616 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2618 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2619 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2622 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2624 struct net_device *master;
2629 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2631 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2632 * invalid value for ifindex to denote "no master".
2634 if (master_idx == -1)
2637 if (!master || master->ifindex != master_idx)
2643 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2645 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2651 struct neigh_dump_filter {
2656 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2657 struct netlink_callback *cb,
2658 struct neigh_dump_filter *filter)
2660 struct net *net = sock_net(skb->sk);
2661 struct neighbour *n;
2662 int rc, h, s_h = cb->args[1];
2663 int idx, s_idx = idx = cb->args[2];
2664 struct neigh_hash_table *nht;
2665 unsigned int flags = NLM_F_MULTI;
2667 if (filter->dev_idx || filter->master_idx)
2668 flags |= NLM_F_DUMP_FILTERED;
2671 nht = rcu_dereference_bh(tbl->nht);
2673 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2676 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2678 n = rcu_dereference_bh(n->next)) {
2679 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2681 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2682 neigh_master_filtered(n->dev, filter->master_idx))
2684 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2697 rcu_read_unlock_bh();
2703 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2704 struct netlink_callback *cb,
2705 struct neigh_dump_filter *filter)
2707 struct pneigh_entry *n;
2708 struct net *net = sock_net(skb->sk);
2709 int rc, h, s_h = cb->args[3];
2710 int idx, s_idx = idx = cb->args[4];
2711 unsigned int flags = NLM_F_MULTI;
2713 if (filter->dev_idx || filter->master_idx)
2714 flags |= NLM_F_DUMP_FILTERED;
2716 read_lock_bh(&tbl->lock);
2718 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2721 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2722 if (idx < s_idx || pneigh_net(n) != net)
2724 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2725 neigh_master_filtered(n->dev, filter->master_idx))
2727 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2729 RTM_NEWNEIGH, flags, tbl) < 0) {
2730 read_unlock_bh(&tbl->lock);
2739 read_unlock_bh(&tbl->lock);
2748 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2750 struct neigh_dump_filter *filter,
2751 struct netlink_ext_ack *extack)
2753 struct nlattr *tb[NDA_MAX + 1];
2759 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2760 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2764 ndm = nlmsg_data(nlh);
2765 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2766 ndm->ndm_state || ndm->ndm_type) {
2767 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2771 if (ndm->ndm_flags & ~NTF_PROXY) {
2772 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2776 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2777 tb, NDA_MAX, nda_policy,
2780 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2781 NDA_MAX, nda_policy, extack);
2786 for (i = 0; i <= NDA_MAX; ++i) {
2790 /* all new attributes should require strict_check */
2793 filter->dev_idx = nla_get_u32(tb[i]);
2796 filter->master_idx = nla_get_u32(tb[i]);
2800 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2809 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2811 const struct nlmsghdr *nlh = cb->nlh;
2812 struct neigh_dump_filter filter = {};
2813 struct neigh_table *tbl;
2818 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2820 /* check for full ndmsg structure presence, family member is
2821 * the same for both structures
2823 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2824 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2827 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2828 if (err < 0 && cb->strict_check)
2833 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2834 tbl = neigh_tables[t];
2838 if (t < s_t || (family && tbl->family != family))
2841 memset(&cb->args[1], 0, sizeof(cb->args) -
2842 sizeof(cb->args[0]));
2844 err = pneigh_dump_table(tbl, skb, cb, &filter);
2846 err = neigh_dump_table(tbl, skb, cb, &filter);
2855 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2856 struct neigh_table **tbl,
2857 void **dst, int *dev_idx, u8 *ndm_flags,
2858 struct netlink_ext_ack *extack)
2860 struct nlattr *tb[NDA_MAX + 1];
2864 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2865 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2869 ndm = nlmsg_data(nlh);
2870 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2872 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2876 if (ndm->ndm_flags & ~NTF_PROXY) {
2877 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2881 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2882 NDA_MAX, nda_policy, extack);
2886 *ndm_flags = ndm->ndm_flags;
2887 *dev_idx = ndm->ndm_ifindex;
2888 *tbl = neigh_find_table(ndm->ndm_family);
2890 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2891 return -EAFNOSUPPORT;
2894 for (i = 0; i <= NDA_MAX; ++i) {
2900 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2901 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2904 *dst = nla_data(tb[i]);
2907 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2915 static inline size_t neigh_nlmsg_size(void)
2917 return NLMSG_ALIGN(sizeof(struct ndmsg))
2918 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2919 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2920 + nla_total_size(sizeof(struct nda_cacheinfo))
2921 + nla_total_size(4) /* NDA_PROBES */
2922 + nla_total_size(4) /* NDA_FLAGS_EXT */
2923 + nla_total_size(1); /* NDA_PROTOCOL */
2926 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2929 struct sk_buff *skb;
2932 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2936 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2942 err = rtnl_unicast(skb, net, pid);
2947 static inline size_t pneigh_nlmsg_size(void)
2949 return NLMSG_ALIGN(sizeof(struct ndmsg))
2950 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2951 + nla_total_size(4) /* NDA_FLAGS_EXT */
2952 + nla_total_size(1); /* NDA_PROTOCOL */
2955 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2956 u32 pid, u32 seq, struct neigh_table *tbl)
2958 struct sk_buff *skb;
2961 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2965 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2971 err = rtnl_unicast(skb, net, pid);
2976 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2977 struct netlink_ext_ack *extack)
2979 struct net *net = sock_net(in_skb->sk);
2980 struct net_device *dev = NULL;
2981 struct neigh_table *tbl = NULL;
2982 struct neighbour *neigh;
2988 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2994 dev = __dev_get_by_index(net, dev_idx);
2996 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3002 NL_SET_ERR_MSG(extack, "Network address not specified");
3006 if (ndm_flags & NTF_PROXY) {
3007 struct pneigh_entry *pn;
3009 pn = pneigh_lookup(tbl, net, dst, dev, 0);
3011 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3014 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3015 nlh->nlmsg_seq, tbl);
3019 NL_SET_ERR_MSG(extack, "No device specified");
3023 neigh = neigh_lookup(tbl, dst, dev);
3025 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3029 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3032 neigh_release(neigh);
3037 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3040 struct neigh_hash_table *nht;
3043 nht = rcu_dereference_bh(tbl->nht);
3045 read_lock(&tbl->lock); /* avoid resizes */
3046 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3047 struct neighbour *n;
3049 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
3051 n = rcu_dereference_bh(n->next))
3054 read_unlock(&tbl->lock);
3055 rcu_read_unlock_bh();
3057 EXPORT_SYMBOL(neigh_for_each);
3059 /* The tbl->lock must be held as a writer and BH disabled. */
3060 void __neigh_for_each_release(struct neigh_table *tbl,
3061 int (*cb)(struct neighbour *))
3064 struct neigh_hash_table *nht;
3066 nht = rcu_dereference_protected(tbl->nht,
3067 lockdep_is_held(&tbl->lock));
3068 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3069 struct neighbour *n;
3070 struct neighbour __rcu **np;
3072 np = &nht->hash_buckets[chain];
3073 while ((n = rcu_dereference_protected(*np,
3074 lockdep_is_held(&tbl->lock))) != NULL) {
3077 write_lock(&n->lock);
3080 rcu_assign_pointer(*np,
3081 rcu_dereference_protected(n->next,
3082 lockdep_is_held(&tbl->lock)));
3086 write_unlock(&n->lock);
3088 neigh_cleanup_and_release(n);
3092 EXPORT_SYMBOL(__neigh_for_each_release);
3094 int neigh_xmit(int index, struct net_device *dev,
3095 const void *addr, struct sk_buff *skb)
3097 int err = -EAFNOSUPPORT;
3098 if (likely(index < NEIGH_NR_TABLES)) {
3099 struct neigh_table *tbl;
3100 struct neighbour *neigh;
3102 tbl = neigh_tables[index];
3106 if (index == NEIGH_ARP_TABLE) {
3107 u32 key = *((u32 *)addr);
3109 neigh = __ipv4_neigh_lookup_noref(dev, key);
3111 neigh = __neigh_lookup_noref(tbl, addr, dev);
3114 neigh = __neigh_create(tbl, addr, dev, false);
3115 err = PTR_ERR(neigh);
3116 if (IS_ERR(neigh)) {
3117 rcu_read_unlock_bh();
3120 err = neigh->output(neigh, skb);
3121 rcu_read_unlock_bh();
3123 else if (index == NEIGH_LINK_TABLE) {
3124 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3125 addr, NULL, skb->len);
3128 err = dev_queue_xmit(skb);
3136 EXPORT_SYMBOL(neigh_xmit);
3138 #ifdef CONFIG_PROC_FS
3140 static struct neighbour *neigh_get_first(struct seq_file *seq)
3142 struct neigh_seq_state *state = seq->private;
3143 struct net *net = seq_file_net(seq);
3144 struct neigh_hash_table *nht = state->nht;
3145 struct neighbour *n = NULL;
3148 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3149 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3150 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3153 if (!net_eq(dev_net(n->dev), net))
3155 if (state->neigh_sub_iter) {
3159 v = state->neigh_sub_iter(state, n, &fakep);
3163 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3165 if (n->nud_state & ~NUD_NOARP)
3168 n = rcu_dereference_bh(n->next);
3174 state->bucket = bucket;
3179 static struct neighbour *neigh_get_next(struct seq_file *seq,
3180 struct neighbour *n,
3183 struct neigh_seq_state *state = seq->private;
3184 struct net *net = seq_file_net(seq);
3185 struct neigh_hash_table *nht = state->nht;
3187 if (state->neigh_sub_iter) {
3188 void *v = state->neigh_sub_iter(state, n, pos);
3192 n = rcu_dereference_bh(n->next);
3196 if (!net_eq(dev_net(n->dev), net))
3198 if (state->neigh_sub_iter) {
3199 void *v = state->neigh_sub_iter(state, n, pos);
3204 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3207 if (n->nud_state & ~NUD_NOARP)
3210 n = rcu_dereference_bh(n->next);
3216 if (++state->bucket >= (1 << nht->hash_shift))
3219 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3227 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3229 struct neighbour *n = neigh_get_first(seq);
3234 n = neigh_get_next(seq, n, pos);
3239 return *pos ? NULL : n;
3242 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3244 struct neigh_seq_state *state = seq->private;
3245 struct net *net = seq_file_net(seq);
3246 struct neigh_table *tbl = state->tbl;
3247 struct pneigh_entry *pn = NULL;
3250 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3251 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3252 pn = tbl->phash_buckets[bucket];
3253 while (pn && !net_eq(pneigh_net(pn), net))
3258 state->bucket = bucket;
3263 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3264 struct pneigh_entry *pn,
3267 struct neigh_seq_state *state = seq->private;
3268 struct net *net = seq_file_net(seq);
3269 struct neigh_table *tbl = state->tbl;
3273 } while (pn && !net_eq(pneigh_net(pn), net));
3276 if (++state->bucket > PNEIGH_HASHMASK)
3278 pn = tbl->phash_buckets[state->bucket];
3279 while (pn && !net_eq(pneigh_net(pn), net))
3291 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3293 struct pneigh_entry *pn = pneigh_get_first(seq);
3298 pn = pneigh_get_next(seq, pn, pos);
3303 return *pos ? NULL : pn;
3306 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3308 struct neigh_seq_state *state = seq->private;
3310 loff_t idxpos = *pos;
3312 rc = neigh_get_idx(seq, &idxpos);
3313 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3314 rc = pneigh_get_idx(seq, &idxpos);
3319 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3320 __acquires(tbl->lock)
3323 struct neigh_seq_state *state = seq->private;
3327 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3330 state->nht = rcu_dereference_bh(tbl->nht);
3331 read_lock(&tbl->lock);
3333 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3335 EXPORT_SYMBOL(neigh_seq_start);
3337 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3339 struct neigh_seq_state *state;
3342 if (v == SEQ_START_TOKEN) {
3343 rc = neigh_get_first(seq);
3347 state = seq->private;
3348 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3349 rc = neigh_get_next(seq, v, NULL);
3352 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3353 rc = pneigh_get_first(seq);
3355 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3356 rc = pneigh_get_next(seq, v, NULL);
3362 EXPORT_SYMBOL(neigh_seq_next);
3364 void neigh_seq_stop(struct seq_file *seq, void *v)
3365 __releases(tbl->lock)
3368 struct neigh_seq_state *state = seq->private;
3369 struct neigh_table *tbl = state->tbl;
3371 read_unlock(&tbl->lock);
3372 rcu_read_unlock_bh();
3374 EXPORT_SYMBOL(neigh_seq_stop);
3376 /* statistics via seq_file */
3378 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3380 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3384 return SEQ_START_TOKEN;
3386 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3387 if (!cpu_possible(cpu))
3390 return per_cpu_ptr(tbl->stats, cpu);
3395 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3397 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3400 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3401 if (!cpu_possible(cpu))
3404 return per_cpu_ptr(tbl->stats, cpu);
3410 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3415 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3417 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3418 struct neigh_statistics *st = v;
3420 if (v == SEQ_START_TOKEN) {
3421 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3425 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3426 "%08lx %08lx %08lx "
3427 "%08lx %08lx %08lx\n",
3428 atomic_read(&tbl->entries),
3439 st->rcv_probes_mcast,
3440 st->rcv_probes_ucast,
3442 st->periodic_gc_runs,
3451 static const struct seq_operations neigh_stat_seq_ops = {
3452 .start = neigh_stat_seq_start,
3453 .next = neigh_stat_seq_next,
3454 .stop = neigh_stat_seq_stop,
3455 .show = neigh_stat_seq_show,
3457 #endif /* CONFIG_PROC_FS */
3459 static void __neigh_notify(struct neighbour *n, int type, int flags,
3462 struct net *net = dev_net(n->dev);
3463 struct sk_buff *skb;
3466 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3470 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3472 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3473 WARN_ON(err == -EMSGSIZE);
3477 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3481 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3484 void neigh_app_ns(struct neighbour *n)
3486 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3488 EXPORT_SYMBOL(neigh_app_ns);
3490 #ifdef CONFIG_SYSCTL
3491 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3493 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3494 void *buffer, size_t *lenp, loff_t *ppos)
3497 struct ctl_table tmp = *ctl;
3499 tmp.extra1 = SYSCTL_ZERO;
3500 tmp.extra2 = &unres_qlen_max;
3503 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3504 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3507 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3511 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3516 return __in_dev_arp_parms_get_rcu(dev);
3518 return __in6_dev_nd_parms_get_rcu(dev);
3523 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3526 struct net_device *dev;
3527 int family = neigh_parms_family(p);
3530 for_each_netdev_rcu(net, dev) {
3531 struct neigh_parms *dst_p =
3532 neigh_get_dev_parms_rcu(dev, family);
3534 if (dst_p && !test_bit(index, dst_p->data_state))
3535 dst_p->data[index] = p->data[index];
3540 static void neigh_proc_update(struct ctl_table *ctl, int write)
3542 struct net_device *dev = ctl->extra1;
3543 struct neigh_parms *p = ctl->extra2;
3544 struct net *net = neigh_parms_net(p);
3545 int index = (int *) ctl->data - p->data;
3550 set_bit(index, p->data_state);
3551 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3552 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3553 if (!dev) /* NULL dev means this is default value */
3554 neigh_copy_dflt_parms(net, p, index);
3557 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3558 void *buffer, size_t *lenp,
3561 struct ctl_table tmp = *ctl;
3564 tmp.extra1 = SYSCTL_ZERO;
3565 tmp.extra2 = SYSCTL_INT_MAX;
3567 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3568 neigh_proc_update(ctl, write);
3572 static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
3573 void *buffer, size_t *lenp, loff_t *ppos)
3575 struct ctl_table tmp = *ctl;
3578 int min = msecs_to_jiffies(1);
3583 ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3584 neigh_proc_update(ctl, write);
3588 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3589 size_t *lenp, loff_t *ppos)
3591 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3593 neigh_proc_update(ctl, write);
3596 EXPORT_SYMBOL(neigh_proc_dointvec);
3598 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3599 size_t *lenp, loff_t *ppos)
3601 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3603 neigh_proc_update(ctl, write);
3606 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3608 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3609 void *buffer, size_t *lenp,
3612 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3614 neigh_proc_update(ctl, write);
3618 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3619 void *buffer, size_t *lenp, loff_t *ppos)
3621 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3623 neigh_proc_update(ctl, write);
3626 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3628 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3629 void *buffer, size_t *lenp,
3632 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3634 neigh_proc_update(ctl, write);
3638 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3639 void *buffer, size_t *lenp,
3642 struct neigh_parms *p = ctl->extra2;
3645 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3646 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3647 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3648 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3652 if (write && ret == 0) {
3653 /* update reachable_time as well, otherwise, the change will
3654 * only be effective after the next time neigh_periodic_work
3655 * decides to recompute it
3658 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3663 #define NEIGH_PARMS_DATA_OFFSET(index) \
3664 (&((struct neigh_parms *) 0)->data[index])
3666 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3667 [NEIGH_VAR_ ## attr] = { \
3669 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3670 .maxlen = sizeof(int), \
3672 .proc_handler = proc, \
3675 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3676 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3678 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3679 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3681 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3682 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3684 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3685 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3687 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3688 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3690 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3691 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3693 static struct neigh_sysctl_table {
3694 struct ctl_table_header *sysctl_header;
3695 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3696 } neigh_sysctl_template __read_mostly = {
3698 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3699 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3700 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3701 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3702 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3703 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3704 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3705 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3706 "interval_probe_time_ms"),
3707 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3708 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3709 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3710 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3711 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3712 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3713 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3714 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3715 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3716 [NEIGH_VAR_GC_INTERVAL] = {
3717 .procname = "gc_interval",
3718 .maxlen = sizeof(int),
3720 .proc_handler = proc_dointvec_jiffies,
3722 [NEIGH_VAR_GC_THRESH1] = {
3723 .procname = "gc_thresh1",
3724 .maxlen = sizeof(int),
3726 .extra1 = SYSCTL_ZERO,
3727 .extra2 = SYSCTL_INT_MAX,
3728 .proc_handler = proc_dointvec_minmax,
3730 [NEIGH_VAR_GC_THRESH2] = {
3731 .procname = "gc_thresh2",
3732 .maxlen = sizeof(int),
3734 .extra1 = SYSCTL_ZERO,
3735 .extra2 = SYSCTL_INT_MAX,
3736 .proc_handler = proc_dointvec_minmax,
3738 [NEIGH_VAR_GC_THRESH3] = {
3739 .procname = "gc_thresh3",
3740 .maxlen = sizeof(int),
3742 .extra1 = SYSCTL_ZERO,
3743 .extra2 = SYSCTL_INT_MAX,
3744 .proc_handler = proc_dointvec_minmax,
3750 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3751 proc_handler *handler)
3754 struct neigh_sysctl_table *t;
3755 const char *dev_name_source;
3756 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3759 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3763 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3764 t->neigh_vars[i].data += (long) p;
3765 t->neigh_vars[i].extra1 = dev;
3766 t->neigh_vars[i].extra2 = p;
3770 dev_name_source = dev->name;
3771 /* Terminate the table early */
3772 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3773 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3775 struct neigh_table *tbl = p->tbl;
3776 dev_name_source = "default";
3777 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3778 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3779 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3780 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3785 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3787 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3788 /* RetransTime (in milliseconds)*/
3789 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3790 /* ReachableTime (in milliseconds) */
3791 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3793 /* Those handlers will update p->reachable_time after
3794 * base_reachable_time(_ms) is set to ensure the new timer starts being
3795 * applied after the next neighbour update instead of waiting for
3796 * neigh_periodic_work to update its value (can be multiple minutes)
3797 * So any handler that replaces them should do this as well
3800 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3801 neigh_proc_base_reachable_time;
3802 /* ReachableTime (in milliseconds) */
3803 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3804 neigh_proc_base_reachable_time;
3807 switch (neigh_parms_family(p)) {
3818 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3819 p_name, dev_name_source);
3821 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3822 if (!t->sysctl_header)
3825 p->sysctl_table = t;
3833 EXPORT_SYMBOL(neigh_sysctl_register);
3835 void neigh_sysctl_unregister(struct neigh_parms *p)
3837 if (p->sysctl_table) {
3838 struct neigh_sysctl_table *t = p->sysctl_table;
3839 p->sysctl_table = NULL;
3840 unregister_net_sysctl_table(t->sysctl_header);
3844 EXPORT_SYMBOL(neigh_sysctl_unregister);
3846 #endif /* CONFIG_SYSCTL */
3848 static int __init neigh_init(void)
3850 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3851 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3852 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3854 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3856 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3861 subsys_initcall(neigh_init);