1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 struct net_device *dev);
61 static const struct seq_operations neigh_stat_seq_ops;
65 Neighbour hash table buckets are protected with rwlock tbl->lock.
67 - All the scans/updates to hash buckets MUST be made under this lock.
68 - NOTHING clever should be made under this lock: no callbacks
69 to protocol backends, no attempts to send something to network.
70 It will result in deadlocks, if backend/driver wants to use neighbour
72 - If the entry requires some non-trivial actions, increase
73 its reference count and release table lock.
75 Neighbour entries are protected:
76 - with reference count.
77 - with rwlock neigh->lock
79 Reference count prevents destruction.
81 neigh->lock mainly serializes ll address data and its validity state.
82 However, the same lock is used to protect another entry fields:
86 Again, nothing clever shall be made under neigh->lock,
87 the most complicated procedure, which we allow is dev->hard_header.
88 It is supposed, that dev->hard_header is simplistic and does
89 not make callbacks to neighbour tables.
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 trace_neigh_cleanup_and_release(neigh, 0);
101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
118 static void neigh_mark_dead(struct neighbour *n)
121 if (!list_empty(&n->gc_list)) {
122 list_del_init(&n->gc_list);
123 atomic_dec(&n->tbl->gc_entries);
125 if (!list_empty(&n->managed_list))
126 list_del_init(&n->managed_list);
129 static void neigh_update_gc_list(struct neighbour *n)
131 bool on_gc_list, exempt_from_gc;
133 write_lock_bh(&n->tbl->lock);
134 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static void neigh_update_managed_list(struct neighbour *n)
160 bool on_managed_list, add_to_managed;
162 write_lock_bh(&n->tbl->lock);
163 write_lock(&n->lock);
167 add_to_managed = n->flags & NTF_MANAGED;
168 on_managed_list = !list_empty(&n->managed_list);
170 if (!add_to_managed && on_managed_list)
171 list_del_init(&n->managed_list);
172 else if (add_to_managed && !on_managed_list)
173 list_add_tail(&n->managed_list, &n->tbl->managed_list);
175 write_unlock(&n->lock);
176 write_unlock_bh(&n->tbl->lock);
179 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180 bool *gc_update, bool *managed_update)
182 u32 ndm_flags, old_flags = neigh->flags;
184 if (!(flags & NEIGH_UPDATE_F_ADMIN))
187 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
190 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
191 if (ndm_flags & NTF_EXT_LEARNED)
192 neigh->flags |= NTF_EXT_LEARNED;
194 neigh->flags &= ~NTF_EXT_LEARNED;
198 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199 if (ndm_flags & NTF_MANAGED)
200 neigh->flags |= NTF_MANAGED;
202 neigh->flags &= ~NTF_MANAGED;
204 *managed_update = true;
208 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
209 struct neigh_table *tbl)
213 write_lock(&n->lock);
214 if (refcount_read(&n->refcnt) == 1) {
215 struct neighbour *neigh;
217 neigh = rcu_dereference_protected(n->next,
218 lockdep_is_held(&tbl->lock));
219 rcu_assign_pointer(*np, neigh);
223 write_unlock(&n->lock);
225 neigh_cleanup_and_release(n);
229 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
231 struct neigh_hash_table *nht;
232 void *pkey = ndel->primary_key;
235 struct neighbour __rcu **np;
237 nht = rcu_dereference_protected(tbl->nht,
238 lockdep_is_held(&tbl->lock));
239 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
240 hash_val = hash_val >> (32 - nht->hash_shift);
242 np = &nht->hash_buckets[hash_val];
243 while ((n = rcu_dereference_protected(*np,
244 lockdep_is_held(&tbl->lock)))) {
246 return neigh_del(n, np, tbl);
252 static int neigh_forced_gc(struct neigh_table *tbl)
254 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
255 unsigned long tref = jiffies - 5 * HZ;
256 struct neighbour *n, *tmp;
259 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
261 write_lock_bh(&tbl->lock);
263 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
264 if (refcount_read(&n->refcnt) == 1) {
267 write_lock(&n->lock);
268 if ((n->nud_state == NUD_FAILED) ||
269 (n->nud_state == NUD_NOARP) ||
270 (tbl->is_multicast &&
271 tbl->is_multicast(n->primary_key)) ||
272 time_after(tref, n->updated))
274 write_unlock(&n->lock);
276 if (remove && neigh_remove_one(n, tbl))
278 if (shrunk >= max_clean)
283 tbl->last_flush = jiffies;
285 write_unlock_bh(&tbl->lock);
290 static void neigh_add_timer(struct neighbour *n, unsigned long when)
293 if (unlikely(mod_timer(&n->timer, when))) {
294 printk("NEIGH: BUG, double timer add, state is %x\n",
300 static int neigh_del_timer(struct neighbour *n)
302 if ((n->nud_state & NUD_IN_TIMER) &&
303 del_timer(&n->timer)) {
310 static void pneigh_queue_purge(struct sk_buff_head *list)
314 while ((skb = skb_dequeue(list)) != NULL) {
320 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
324 struct neigh_hash_table *nht;
326 nht = rcu_dereference_protected(tbl->nht,
327 lockdep_is_held(&tbl->lock));
329 for (i = 0; i < (1 << nht->hash_shift); i++) {
331 struct neighbour __rcu **np = &nht->hash_buckets[i];
333 while ((n = rcu_dereference_protected(*np,
334 lockdep_is_held(&tbl->lock))) != NULL) {
335 if (dev && n->dev != dev) {
339 if (skip_perm && n->nud_state & NUD_PERMANENT) {
343 rcu_assign_pointer(*np,
344 rcu_dereference_protected(n->next,
345 lockdep_is_held(&tbl->lock)));
346 write_lock(&n->lock);
349 if (refcount_read(&n->refcnt) != 1) {
350 /* The most unpleasant situation.
351 We must destroy neighbour entry,
352 but someone still uses it.
354 The destroy will be delayed until
355 the last user releases us, but
356 we must kill timers etc. and move
359 __skb_queue_purge(&n->arp_queue);
360 n->arp_queue_len_bytes = 0;
361 n->output = neigh_blackhole;
362 if (n->nud_state & NUD_VALID)
363 n->nud_state = NUD_NOARP;
365 n->nud_state = NUD_NONE;
366 neigh_dbg(2, "neigh %p is stray\n", n);
368 write_unlock(&n->lock);
369 neigh_cleanup_and_release(n);
374 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
376 write_lock_bh(&tbl->lock);
377 neigh_flush_dev(tbl, dev, false);
378 write_unlock_bh(&tbl->lock);
380 EXPORT_SYMBOL(neigh_changeaddr);
382 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
385 write_lock_bh(&tbl->lock);
386 neigh_flush_dev(tbl, dev, skip_perm);
387 pneigh_ifdown_and_unlock(tbl, dev);
389 del_timer_sync(&tbl->proxy_timer);
390 pneigh_queue_purge(&tbl->proxy_queue);
394 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
396 __neigh_ifdown(tbl, dev, true);
399 EXPORT_SYMBOL(neigh_carrier_down);
401 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
403 __neigh_ifdown(tbl, dev, false);
406 EXPORT_SYMBOL(neigh_ifdown);
408 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
409 struct net_device *dev,
410 u32 flags, bool exempt_from_gc)
412 struct neighbour *n = NULL;
413 unsigned long now = jiffies;
419 entries = atomic_inc_return(&tbl->gc_entries) - 1;
420 if (entries >= tbl->gc_thresh3 ||
421 (entries >= tbl->gc_thresh2 &&
422 time_after(now, tbl->last_flush + 5 * HZ))) {
423 if (!neigh_forced_gc(tbl) &&
424 entries >= tbl->gc_thresh3) {
425 net_info_ratelimited("%s: neighbor table overflow!\n",
427 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
433 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
437 __skb_queue_head_init(&n->arp_queue);
438 rwlock_init(&n->lock);
439 seqlock_init(&n->ha_lock);
440 n->updated = n->used = now;
441 n->nud_state = NUD_NONE;
442 n->output = neigh_blackhole;
444 seqlock_init(&n->hh.hh_lock);
445 n->parms = neigh_parms_clone(&tbl->parms);
446 timer_setup(&n->timer, neigh_timer_handler, 0);
448 NEIGH_CACHE_STAT_INC(tbl, allocs);
450 refcount_set(&n->refcnt, 1);
452 INIT_LIST_HEAD(&n->gc_list);
453 INIT_LIST_HEAD(&n->managed_list);
455 atomic_inc(&tbl->entries);
461 atomic_dec(&tbl->gc_entries);
465 static void neigh_get_hash_rnd(u32 *x)
467 *x = get_random_u32() | 1;
470 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
472 size_t size = (1 << shift) * sizeof(struct neighbour *);
473 struct neigh_hash_table *ret;
474 struct neighbour __rcu **buckets;
477 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
480 if (size <= PAGE_SIZE) {
481 buckets = kzalloc(size, GFP_ATOMIC);
483 buckets = (struct neighbour __rcu **)
484 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
486 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
492 ret->hash_buckets = buckets;
493 ret->hash_shift = shift;
494 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
495 neigh_get_hash_rnd(&ret->hash_rnd[i]);
499 static void neigh_hash_free_rcu(struct rcu_head *head)
501 struct neigh_hash_table *nht = container_of(head,
502 struct neigh_hash_table,
504 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
505 struct neighbour __rcu **buckets = nht->hash_buckets;
507 if (size <= PAGE_SIZE) {
510 kmemleak_free(buckets);
511 free_pages((unsigned long)buckets, get_order(size));
516 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
517 unsigned long new_shift)
519 unsigned int i, hash;
520 struct neigh_hash_table *new_nht, *old_nht;
522 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
524 old_nht = rcu_dereference_protected(tbl->nht,
525 lockdep_is_held(&tbl->lock));
526 new_nht = neigh_hash_alloc(new_shift);
530 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
531 struct neighbour *n, *next;
533 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
534 lockdep_is_held(&tbl->lock));
537 hash = tbl->hash(n->primary_key, n->dev,
540 hash >>= (32 - new_nht->hash_shift);
541 next = rcu_dereference_protected(n->next,
542 lockdep_is_held(&tbl->lock));
544 rcu_assign_pointer(n->next,
545 rcu_dereference_protected(
546 new_nht->hash_buckets[hash],
547 lockdep_is_held(&tbl->lock)));
548 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
552 rcu_assign_pointer(tbl->nht, new_nht);
553 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
557 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
558 struct net_device *dev)
562 NEIGH_CACHE_STAT_INC(tbl, lookups);
565 n = __neigh_lookup_noref(tbl, pkey, dev);
567 if (!refcount_inc_not_zero(&n->refcnt))
569 NEIGH_CACHE_STAT_INC(tbl, hits);
572 rcu_read_unlock_bh();
575 EXPORT_SYMBOL(neigh_lookup);
577 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
581 unsigned int key_len = tbl->key_len;
583 struct neigh_hash_table *nht;
585 NEIGH_CACHE_STAT_INC(tbl, lookups);
588 nht = rcu_dereference_bh(tbl->nht);
589 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
591 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
593 n = rcu_dereference_bh(n->next)) {
594 if (!memcmp(n->primary_key, pkey, key_len) &&
595 net_eq(dev_net(n->dev), net)) {
596 if (!refcount_inc_not_zero(&n->refcnt))
598 NEIGH_CACHE_STAT_INC(tbl, hits);
603 rcu_read_unlock_bh();
606 EXPORT_SYMBOL(neigh_lookup_nodev);
608 static struct neighbour *
609 ___neigh_create(struct neigh_table *tbl, const void *pkey,
610 struct net_device *dev, u32 flags,
611 bool exempt_from_gc, bool want_ref)
613 u32 hash_val, key_len = tbl->key_len;
614 struct neighbour *n1, *rc, *n;
615 struct neigh_hash_table *nht;
618 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
619 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
621 rc = ERR_PTR(-ENOBUFS);
625 memcpy(n->primary_key, pkey, key_len);
627 dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC);
629 /* Protocol specific setup. */
630 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
632 goto out_neigh_release;
635 if (dev->netdev_ops->ndo_neigh_construct) {
636 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
639 goto out_neigh_release;
643 /* Device specific setup. */
644 if (n->parms->neigh_setup &&
645 (error = n->parms->neigh_setup(n)) < 0) {
647 goto out_neigh_release;
650 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
652 write_lock_bh(&tbl->lock);
653 nht = rcu_dereference_protected(tbl->nht,
654 lockdep_is_held(&tbl->lock));
656 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
657 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
659 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
661 if (n->parms->dead) {
662 rc = ERR_PTR(-EINVAL);
666 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
667 lockdep_is_held(&tbl->lock));
669 n1 = rcu_dereference_protected(n1->next,
670 lockdep_is_held(&tbl->lock))) {
671 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
681 list_add_tail(&n->gc_list, &n->tbl->gc_list);
682 if (n->flags & NTF_MANAGED)
683 list_add_tail(&n->managed_list, &n->tbl->managed_list);
686 rcu_assign_pointer(n->next,
687 rcu_dereference_protected(nht->hash_buckets[hash_val],
688 lockdep_is_held(&tbl->lock)));
689 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
690 write_unlock_bh(&tbl->lock);
691 neigh_dbg(2, "neigh %p is created\n", n);
696 write_unlock_bh(&tbl->lock);
699 atomic_dec(&tbl->gc_entries);
704 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
705 struct net_device *dev, bool want_ref)
707 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
709 EXPORT_SYMBOL(__neigh_create);
711 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
713 u32 hash_val = *(u32 *)(pkey + key_len - 4);
714 hash_val ^= (hash_val >> 16);
715 hash_val ^= hash_val >> 8;
716 hash_val ^= hash_val >> 4;
717 hash_val &= PNEIGH_HASHMASK;
721 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
724 unsigned int key_len,
725 struct net_device *dev)
728 if (!memcmp(n->key, pkey, key_len) &&
729 net_eq(pneigh_net(n), net) &&
730 (n->dev == dev || !n->dev))
737 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
738 struct net *net, const void *pkey, struct net_device *dev)
740 unsigned int key_len = tbl->key_len;
741 u32 hash_val = pneigh_hash(pkey, key_len);
743 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
744 net, pkey, key_len, dev);
746 EXPORT_SYMBOL_GPL(__pneigh_lookup);
748 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
749 struct net *net, const void *pkey,
750 struct net_device *dev, int creat)
752 struct pneigh_entry *n;
753 unsigned int key_len = tbl->key_len;
754 u32 hash_val = pneigh_hash(pkey, key_len);
756 read_lock_bh(&tbl->lock);
757 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
758 net, pkey, key_len, dev);
759 read_unlock_bh(&tbl->lock);
766 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
770 write_pnet(&n->net, net);
771 memcpy(n->key, pkey, key_len);
773 dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL);
775 if (tbl->pconstructor && tbl->pconstructor(n)) {
776 dev_put_track(dev, &n->dev_tracker);
782 write_lock_bh(&tbl->lock);
783 n->next = tbl->phash_buckets[hash_val];
784 tbl->phash_buckets[hash_val] = n;
785 write_unlock_bh(&tbl->lock);
789 EXPORT_SYMBOL(pneigh_lookup);
792 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
793 struct net_device *dev)
795 struct pneigh_entry *n, **np;
796 unsigned int key_len = tbl->key_len;
797 u32 hash_val = pneigh_hash(pkey, key_len);
799 write_lock_bh(&tbl->lock);
800 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
802 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
803 net_eq(pneigh_net(n), net)) {
805 write_unlock_bh(&tbl->lock);
806 if (tbl->pdestructor)
808 dev_put_track(n->dev, &n->dev_tracker);
813 write_unlock_bh(&tbl->lock);
817 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
818 struct net_device *dev)
820 struct pneigh_entry *n, **np, *freelist = NULL;
823 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
824 np = &tbl->phash_buckets[h];
825 while ((n = *np) != NULL) {
826 if (!dev || n->dev == dev) {
835 write_unlock_bh(&tbl->lock);
836 while ((n = freelist)) {
839 if (tbl->pdestructor)
841 dev_put_track(n->dev, &n->dev_tracker);
847 static void neigh_parms_destroy(struct neigh_parms *parms);
849 static inline void neigh_parms_put(struct neigh_parms *parms)
851 if (refcount_dec_and_test(&parms->refcnt))
852 neigh_parms_destroy(parms);
856 * neighbour must already be out of the table;
859 void neigh_destroy(struct neighbour *neigh)
861 struct net_device *dev = neigh->dev;
863 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
866 pr_warn("Destroying alive neighbour %p\n", neigh);
871 if (neigh_del_timer(neigh))
872 pr_warn("Impossible event\n");
874 write_lock_bh(&neigh->lock);
875 __skb_queue_purge(&neigh->arp_queue);
876 write_unlock_bh(&neigh->lock);
877 neigh->arp_queue_len_bytes = 0;
879 if (dev->netdev_ops->ndo_neigh_destroy)
880 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
882 dev_put_track(dev, &neigh->dev_tracker);
883 neigh_parms_put(neigh->parms);
885 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
887 atomic_dec(&neigh->tbl->entries);
888 kfree_rcu(neigh, rcu);
890 EXPORT_SYMBOL(neigh_destroy);
892 /* Neighbour state is suspicious;
895 Called with write_locked neigh.
897 static void neigh_suspect(struct neighbour *neigh)
899 neigh_dbg(2, "neigh %p is suspected\n", neigh);
901 neigh->output = neigh->ops->output;
904 /* Neighbour state is OK;
907 Called with write_locked neigh.
909 static void neigh_connect(struct neighbour *neigh)
911 neigh_dbg(2, "neigh %p is connected\n", neigh);
913 neigh->output = neigh->ops->connected_output;
916 static void neigh_periodic_work(struct work_struct *work)
918 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
920 struct neighbour __rcu **np;
922 struct neigh_hash_table *nht;
924 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
926 write_lock_bh(&tbl->lock);
927 nht = rcu_dereference_protected(tbl->nht,
928 lockdep_is_held(&tbl->lock));
931 * periodically recompute ReachableTime from random function
934 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
935 struct neigh_parms *p;
936 tbl->last_rand = jiffies;
937 list_for_each_entry(p, &tbl->parms_list, list)
939 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
942 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
945 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
946 np = &nht->hash_buckets[i];
948 while ((n = rcu_dereference_protected(*np,
949 lockdep_is_held(&tbl->lock))) != NULL) {
952 write_lock(&n->lock);
954 state = n->nud_state;
955 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
956 (n->flags & NTF_EXT_LEARNED)) {
957 write_unlock(&n->lock);
961 if (time_before(n->used, n->confirmed))
962 n->used = n->confirmed;
964 if (refcount_read(&n->refcnt) == 1 &&
965 (state == NUD_FAILED ||
966 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
969 write_unlock(&n->lock);
970 neigh_cleanup_and_release(n);
973 write_unlock(&n->lock);
979 * It's fine to release lock here, even if hash table
980 * grows while we are preempted.
982 write_unlock_bh(&tbl->lock);
984 write_lock_bh(&tbl->lock);
985 nht = rcu_dereference_protected(tbl->nht,
986 lockdep_is_held(&tbl->lock));
989 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
990 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
991 * BASE_REACHABLE_TIME.
993 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
994 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
995 write_unlock_bh(&tbl->lock);
998 static __inline__ int neigh_max_probes(struct neighbour *n)
1000 struct neigh_parms *p = n->parms;
1001 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1002 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1003 NEIGH_VAR(p, MCAST_PROBES));
1006 static void neigh_invalidate(struct neighbour *neigh)
1007 __releases(neigh->lock)
1008 __acquires(neigh->lock)
1010 struct sk_buff *skb;
1012 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1013 neigh_dbg(2, "neigh %p is failed\n", neigh);
1014 neigh->updated = jiffies;
1016 /* It is very thin place. report_unreachable is very complicated
1017 routine. Particularly, it can hit the same neighbour entry!
1019 So that, we try to be accurate and avoid dead loop. --ANK
1021 while (neigh->nud_state == NUD_FAILED &&
1022 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1023 write_unlock(&neigh->lock);
1024 neigh->ops->error_report(neigh, skb);
1025 write_lock(&neigh->lock);
1027 __skb_queue_purge(&neigh->arp_queue);
1028 neigh->arp_queue_len_bytes = 0;
1031 static void neigh_probe(struct neighbour *neigh)
1032 __releases(neigh->lock)
1034 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1035 /* keep skb alive even if arp_queue overflows */
1037 skb = skb_clone(skb, GFP_ATOMIC);
1038 write_unlock(&neigh->lock);
1039 if (neigh->ops->solicit)
1040 neigh->ops->solicit(neigh, skb);
1041 atomic_inc(&neigh->probes);
1045 /* Called when a timer expires for a neighbour entry. */
1047 static void neigh_timer_handler(struct timer_list *t)
1049 unsigned long now, next;
1050 struct neighbour *neigh = from_timer(neigh, t, timer);
1054 write_lock(&neigh->lock);
1056 state = neigh->nud_state;
1060 if (!(state & NUD_IN_TIMER))
1063 if (state & NUD_REACHABLE) {
1064 if (time_before_eq(now,
1065 neigh->confirmed + neigh->parms->reachable_time)) {
1066 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1067 next = neigh->confirmed + neigh->parms->reachable_time;
1068 } else if (time_before_eq(now,
1070 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1071 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1072 neigh->nud_state = NUD_DELAY;
1073 neigh->updated = jiffies;
1074 neigh_suspect(neigh);
1075 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1077 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1078 neigh->nud_state = NUD_STALE;
1079 neigh->updated = jiffies;
1080 neigh_suspect(neigh);
1083 } else if (state & NUD_DELAY) {
1084 if (time_before_eq(now,
1086 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1087 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1088 neigh->nud_state = NUD_REACHABLE;
1089 neigh->updated = jiffies;
1090 neigh_connect(neigh);
1092 next = neigh->confirmed + neigh->parms->reachable_time;
1094 neigh_dbg(2, "neigh %p is probed\n", neigh);
1095 neigh->nud_state = NUD_PROBE;
1096 neigh->updated = jiffies;
1097 atomic_set(&neigh->probes, 0);
1099 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1103 /* NUD_PROBE|NUD_INCOMPLETE */
1104 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1107 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1108 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1109 neigh->nud_state = NUD_FAILED;
1111 neigh_invalidate(neigh);
1115 if (neigh->nud_state & NUD_IN_TIMER) {
1116 if (time_before(next, jiffies + HZ/100))
1117 next = jiffies + HZ/100;
1118 if (!mod_timer(&neigh->timer, next))
1121 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1125 write_unlock(&neigh->lock);
1129 neigh_update_notify(neigh, 0);
1131 trace_neigh_timer_handler(neigh, 0);
1133 neigh_release(neigh);
1136 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1137 const bool immediate_ok)
1140 bool immediate_probe = false;
1142 write_lock_bh(&neigh->lock);
1145 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1150 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1151 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1152 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1153 unsigned long next, now = jiffies;
1155 atomic_set(&neigh->probes,
1156 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1157 neigh_del_timer(neigh);
1158 neigh->nud_state = NUD_INCOMPLETE;
1159 neigh->updated = now;
1160 if (!immediate_ok) {
1163 immediate_probe = true;
1164 next = now + max(NEIGH_VAR(neigh->parms,
1168 neigh_add_timer(neigh, next);
1170 neigh->nud_state = NUD_FAILED;
1171 neigh->updated = jiffies;
1172 write_unlock_bh(&neigh->lock);
1174 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1177 } else if (neigh->nud_state & NUD_STALE) {
1178 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1179 neigh_del_timer(neigh);
1180 neigh->nud_state = NUD_DELAY;
1181 neigh->updated = jiffies;
1182 neigh_add_timer(neigh, jiffies +
1183 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1186 if (neigh->nud_state == NUD_INCOMPLETE) {
1188 while (neigh->arp_queue_len_bytes + skb->truesize >
1189 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1190 struct sk_buff *buff;
1192 buff = __skb_dequeue(&neigh->arp_queue);
1195 neigh->arp_queue_len_bytes -= buff->truesize;
1196 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1197 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1200 __skb_queue_tail(&neigh->arp_queue, skb);
1201 neigh->arp_queue_len_bytes += skb->truesize;
1206 if (immediate_probe)
1209 write_unlock(&neigh->lock);
1211 trace_neigh_event_send_done(neigh, rc);
1215 if (neigh->nud_state & NUD_STALE)
1217 write_unlock_bh(&neigh->lock);
1218 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1219 trace_neigh_event_send_dead(neigh, 1);
1222 EXPORT_SYMBOL(__neigh_event_send);
1224 static void neigh_update_hhs(struct neighbour *neigh)
1226 struct hh_cache *hh;
1227 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1230 if (neigh->dev->header_ops)
1231 update = neigh->dev->header_ops->cache_update;
1235 if (READ_ONCE(hh->hh_len)) {
1236 write_seqlock_bh(&hh->hh_lock);
1237 update(hh, neigh->dev, neigh->ha);
1238 write_sequnlock_bh(&hh->hh_lock);
1243 /* Generic update routine.
1244 -- lladdr is new lladdr or NULL, if it is not supplied.
1245 -- new is new state.
1247 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1249 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1250 lladdr instead of overriding it
1252 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1253 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1254 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1255 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1257 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1260 Caller MUST hold reference count on the entry.
1262 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1263 u8 new, u32 flags, u32 nlmsg_pid,
1264 struct netlink_ext_ack *extack)
1266 bool gc_update = false, managed_update = false;
1267 int update_isrouter = 0;
1268 struct net_device *dev;
1269 int err, notify = 0;
1272 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1274 write_lock_bh(&neigh->lock);
1277 old = neigh->nud_state;
1281 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1285 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1286 (old & (NUD_NOARP | NUD_PERMANENT)))
1289 neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
1290 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1291 new = old & ~NUD_PERMANENT;
1292 neigh->nud_state = new;
1297 if (!(new & NUD_VALID)) {
1298 neigh_del_timer(neigh);
1299 if (old & NUD_CONNECTED)
1300 neigh_suspect(neigh);
1301 neigh->nud_state = new;
1303 notify = old & NUD_VALID;
1304 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1305 (new & NUD_FAILED)) {
1306 neigh_invalidate(neigh);
1312 /* Compare new lladdr with cached one */
1313 if (!dev->addr_len) {
1314 /* First case: device needs no address. */
1316 } else if (lladdr) {
1317 /* The second case: if something is already cached
1318 and a new address is proposed:
1320 - if they are different, check override flag
1322 if ((old & NUD_VALID) &&
1323 !memcmp(lladdr, neigh->ha, dev->addr_len))
1326 /* No address is supplied; if we know something,
1327 use it, otherwise discard the request.
1330 if (!(old & NUD_VALID)) {
1331 NL_SET_ERR_MSG(extack, "No link layer address given");
1337 /* Update confirmed timestamp for neighbour entry after we
1338 * received ARP packet even if it doesn't change IP to MAC binding.
1340 if (new & NUD_CONNECTED)
1341 neigh->confirmed = jiffies;
1343 /* If entry was valid and address is not changed,
1344 do not change entry state, if new one is STALE.
1347 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1348 if (old & NUD_VALID) {
1349 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1350 update_isrouter = 0;
1351 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1352 (old & NUD_CONNECTED)) {
1358 if (lladdr == neigh->ha && new == NUD_STALE &&
1359 !(flags & NEIGH_UPDATE_F_ADMIN))
1364 /* Update timestamp only once we know we will make a change to the
1365 * neighbour entry. Otherwise we risk to move the locktime window with
1366 * noop updates and ignore relevant ARP updates.
1368 if (new != old || lladdr != neigh->ha)
1369 neigh->updated = jiffies;
1372 neigh_del_timer(neigh);
1373 if (new & NUD_PROBE)
1374 atomic_set(&neigh->probes, 0);
1375 if (new & NUD_IN_TIMER)
1376 neigh_add_timer(neigh, (jiffies +
1377 ((new & NUD_REACHABLE) ?
1378 neigh->parms->reachable_time :
1380 neigh->nud_state = new;
1384 if (lladdr != neigh->ha) {
1385 write_seqlock(&neigh->ha_lock);
1386 memcpy(&neigh->ha, lladdr, dev->addr_len);
1387 write_sequnlock(&neigh->ha_lock);
1388 neigh_update_hhs(neigh);
1389 if (!(new & NUD_CONNECTED))
1390 neigh->confirmed = jiffies -
1391 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1396 if (new & NUD_CONNECTED)
1397 neigh_connect(neigh);
1399 neigh_suspect(neigh);
1400 if (!(old & NUD_VALID)) {
1401 struct sk_buff *skb;
1403 /* Again: avoid dead loop if something went wrong */
1405 while (neigh->nud_state & NUD_VALID &&
1406 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1407 struct dst_entry *dst = skb_dst(skb);
1408 struct neighbour *n2, *n1 = neigh;
1409 write_unlock_bh(&neigh->lock);
1413 /* Why not just use 'neigh' as-is? The problem is that
1414 * things such as shaper, eql, and sch_teql can end up
1415 * using alternative, different, neigh objects to output
1416 * the packet in the output path. So what we need to do
1417 * here is re-lookup the top-level neigh in the path so
1418 * we can reinject the packet there.
1421 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1422 n2 = dst_neigh_lookup_skb(dst, skb);
1426 n1->output(n1, skb);
1431 write_lock_bh(&neigh->lock);
1433 __skb_queue_purge(&neigh->arp_queue);
1434 neigh->arp_queue_len_bytes = 0;
1437 if (update_isrouter)
1438 neigh_update_is_router(neigh, flags, ¬ify);
1439 write_unlock_bh(&neigh->lock);
1440 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1441 neigh_update_gc_list(neigh);
1443 neigh_update_managed_list(neigh);
1445 neigh_update_notify(neigh, nlmsg_pid);
1446 trace_neigh_update_done(neigh, err);
1450 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1451 u32 flags, u32 nlmsg_pid)
1453 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1455 EXPORT_SYMBOL(neigh_update);
1457 /* Update the neigh to listen temporarily for probe responses, even if it is
1458 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1460 void __neigh_set_probe_once(struct neighbour *neigh)
1464 neigh->updated = jiffies;
1465 if (!(neigh->nud_state & NUD_FAILED))
1467 neigh->nud_state = NUD_INCOMPLETE;
1468 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1469 neigh_add_timer(neigh,
1470 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1473 EXPORT_SYMBOL(__neigh_set_probe_once);
1475 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1476 u8 *lladdr, void *saddr,
1477 struct net_device *dev)
1479 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1480 lladdr || !dev->addr_len);
1482 neigh_update(neigh, lladdr, NUD_STALE,
1483 NEIGH_UPDATE_F_OVERRIDE, 0);
1486 EXPORT_SYMBOL(neigh_event_ns);
1488 /* called with read_lock_bh(&n->lock); */
1489 static void neigh_hh_init(struct neighbour *n)
1491 struct net_device *dev = n->dev;
1492 __be16 prot = n->tbl->protocol;
1493 struct hh_cache *hh = &n->hh;
1495 write_lock_bh(&n->lock);
1497 /* Only one thread can come in here and initialize the
1501 dev->header_ops->cache(n, hh, prot);
1503 write_unlock_bh(&n->lock);
1506 /* Slow and careful. */
1508 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1512 if (!neigh_event_send(neigh, skb)) {
1514 struct net_device *dev = neigh->dev;
1517 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1518 neigh_hh_init(neigh);
1521 __skb_pull(skb, skb_network_offset(skb));
1522 seq = read_seqbegin(&neigh->ha_lock);
1523 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1524 neigh->ha, NULL, skb->len);
1525 } while (read_seqretry(&neigh->ha_lock, seq));
1528 rc = dev_queue_xmit(skb);
1539 EXPORT_SYMBOL(neigh_resolve_output);
1541 /* As fast as possible without hh cache */
1543 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1545 struct net_device *dev = neigh->dev;
1550 __skb_pull(skb, skb_network_offset(skb));
1551 seq = read_seqbegin(&neigh->ha_lock);
1552 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1553 neigh->ha, NULL, skb->len);
1554 } while (read_seqretry(&neigh->ha_lock, seq));
1557 err = dev_queue_xmit(skb);
1564 EXPORT_SYMBOL(neigh_connected_output);
1566 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1568 return dev_queue_xmit(skb);
1570 EXPORT_SYMBOL(neigh_direct_output);
1572 static void neigh_managed_work(struct work_struct *work)
1574 struct neigh_table *tbl = container_of(work, struct neigh_table,
1576 struct neighbour *neigh;
1578 write_lock_bh(&tbl->lock);
1579 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1580 neigh_event_send_probe(neigh, NULL, false);
1581 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1582 NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
1583 write_unlock_bh(&tbl->lock);
1586 static void neigh_proxy_process(struct timer_list *t)
1588 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1589 long sched_next = 0;
1590 unsigned long now = jiffies;
1591 struct sk_buff *skb, *n;
1593 spin_lock(&tbl->proxy_queue.lock);
1595 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1596 long tdif = NEIGH_CB(skb)->sched_next - now;
1599 struct net_device *dev = skb->dev;
1601 __skb_unlink(skb, &tbl->proxy_queue);
1602 if (tbl->proxy_redo && netif_running(dev)) {
1604 tbl->proxy_redo(skb);
1611 } else if (!sched_next || tdif < sched_next)
1614 del_timer(&tbl->proxy_timer);
1616 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1617 spin_unlock(&tbl->proxy_queue.lock);
1620 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1621 struct sk_buff *skb)
1623 unsigned long sched_next = jiffies +
1624 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
1626 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1631 NEIGH_CB(skb)->sched_next = sched_next;
1632 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1634 spin_lock(&tbl->proxy_queue.lock);
1635 if (del_timer(&tbl->proxy_timer)) {
1636 if (time_before(tbl->proxy_timer.expires, sched_next))
1637 sched_next = tbl->proxy_timer.expires;
1641 __skb_queue_tail(&tbl->proxy_queue, skb);
1642 mod_timer(&tbl->proxy_timer, sched_next);
1643 spin_unlock(&tbl->proxy_queue.lock);
1645 EXPORT_SYMBOL(pneigh_enqueue);
1647 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1648 struct net *net, int ifindex)
1650 struct neigh_parms *p;
1652 list_for_each_entry(p, &tbl->parms_list, list) {
1653 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1654 (!p->dev && !ifindex && net_eq(net, &init_net)))
1661 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1662 struct neigh_table *tbl)
1664 struct neigh_parms *p;
1665 struct net *net = dev_net(dev);
1666 const struct net_device_ops *ops = dev->netdev_ops;
1668 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1671 refcount_set(&p->refcnt, 1);
1673 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1674 dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL);
1676 write_pnet(&p->net, net);
1677 p->sysctl_table = NULL;
1679 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1680 dev_put_track(dev, &p->dev_tracker);
1685 write_lock_bh(&tbl->lock);
1686 list_add(&p->list, &tbl->parms.list);
1687 write_unlock_bh(&tbl->lock);
1689 neigh_parms_data_state_cleanall(p);
1693 EXPORT_SYMBOL(neigh_parms_alloc);
1695 static void neigh_rcu_free_parms(struct rcu_head *head)
1697 struct neigh_parms *parms =
1698 container_of(head, struct neigh_parms, rcu_head);
1700 neigh_parms_put(parms);
1703 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1705 if (!parms || parms == &tbl->parms)
1707 write_lock_bh(&tbl->lock);
1708 list_del(&parms->list);
1710 write_unlock_bh(&tbl->lock);
1711 dev_put_track(parms->dev, &parms->dev_tracker);
1712 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1714 EXPORT_SYMBOL(neigh_parms_release);
1716 static void neigh_parms_destroy(struct neigh_parms *parms)
1721 static struct lock_class_key neigh_table_proxy_queue_class;
1723 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1725 void neigh_table_init(int index, struct neigh_table *tbl)
1727 unsigned long now = jiffies;
1728 unsigned long phsize;
1730 INIT_LIST_HEAD(&tbl->parms_list);
1731 INIT_LIST_HEAD(&tbl->gc_list);
1732 INIT_LIST_HEAD(&tbl->managed_list);
1734 list_add(&tbl->parms.list, &tbl->parms_list);
1735 write_pnet(&tbl->parms.net, &init_net);
1736 refcount_set(&tbl->parms.refcnt, 1);
1737 tbl->parms.reachable_time =
1738 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1740 tbl->stats = alloc_percpu(struct neigh_statistics);
1742 panic("cannot create neighbour cache statistics");
1744 #ifdef CONFIG_PROC_FS
1745 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1746 &neigh_stat_seq_ops, tbl))
1747 panic("cannot create neighbour proc dir entry");
1750 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1752 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1753 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1755 if (!tbl->nht || !tbl->phash_buckets)
1756 panic("cannot allocate neighbour cache hashes");
1758 if (!tbl->entry_size)
1759 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1760 tbl->key_len, NEIGH_PRIV_ALIGN);
1762 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1764 rwlock_init(&tbl->lock);
1766 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1767 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1768 tbl->parms.reachable_time);
1769 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1770 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1772 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1773 skb_queue_head_init_class(&tbl->proxy_queue,
1774 &neigh_table_proxy_queue_class);
1776 tbl->last_flush = now;
1777 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1779 neigh_tables[index] = tbl;
1781 EXPORT_SYMBOL(neigh_table_init);
1783 int neigh_table_clear(int index, struct neigh_table *tbl)
1785 neigh_tables[index] = NULL;
1786 /* It is not clean... Fix it to unload IPv6 module safely */
1787 cancel_delayed_work_sync(&tbl->managed_work);
1788 cancel_delayed_work_sync(&tbl->gc_work);
1789 del_timer_sync(&tbl->proxy_timer);
1790 pneigh_queue_purge(&tbl->proxy_queue);
1791 neigh_ifdown(tbl, NULL);
1792 if (atomic_read(&tbl->entries))
1793 pr_crit("neighbour leakage\n");
1795 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1796 neigh_hash_free_rcu);
1799 kfree(tbl->phash_buckets);
1800 tbl->phash_buckets = NULL;
1802 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1804 free_percpu(tbl->stats);
1809 EXPORT_SYMBOL(neigh_table_clear);
1811 static struct neigh_table *neigh_find_table(int family)
1813 struct neigh_table *tbl = NULL;
1817 tbl = neigh_tables[NEIGH_ARP_TABLE];
1820 tbl = neigh_tables[NEIGH_ND_TABLE];
1823 tbl = neigh_tables[NEIGH_DN_TABLE];
1830 const struct nla_policy nda_policy[NDA_MAX+1] = {
1831 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1832 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1833 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1834 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1835 [NDA_PROBES] = { .type = NLA_U32 },
1836 [NDA_VLAN] = { .type = NLA_U16 },
1837 [NDA_PORT] = { .type = NLA_U16 },
1838 [NDA_VNI] = { .type = NLA_U32 },
1839 [NDA_IFINDEX] = { .type = NLA_U32 },
1840 [NDA_MASTER] = { .type = NLA_U32 },
1841 [NDA_PROTOCOL] = { .type = NLA_U8 },
1842 [NDA_NH_ID] = { .type = NLA_U32 },
1843 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1844 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1847 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1848 struct netlink_ext_ack *extack)
1850 struct net *net = sock_net(skb->sk);
1852 struct nlattr *dst_attr;
1853 struct neigh_table *tbl;
1854 struct neighbour *neigh;
1855 struct net_device *dev = NULL;
1859 if (nlmsg_len(nlh) < sizeof(*ndm))
1862 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1864 NL_SET_ERR_MSG(extack, "Network address not specified");
1868 ndm = nlmsg_data(nlh);
1869 if (ndm->ndm_ifindex) {
1870 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1877 tbl = neigh_find_table(ndm->ndm_family);
1879 return -EAFNOSUPPORT;
1881 if (nla_len(dst_attr) < (int)tbl->key_len) {
1882 NL_SET_ERR_MSG(extack, "Invalid network address");
1886 if (ndm->ndm_flags & NTF_PROXY) {
1887 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1894 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1895 if (neigh == NULL) {
1900 err = __neigh_update(neigh, NULL, NUD_FAILED,
1901 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1902 NETLINK_CB(skb).portid, extack);
1903 write_lock_bh(&tbl->lock);
1904 neigh_release(neigh);
1905 neigh_remove_one(neigh, tbl);
1906 write_unlock_bh(&tbl->lock);
1912 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1913 struct netlink_ext_ack *extack)
1915 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1916 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1917 struct net *net = sock_net(skb->sk);
1919 struct nlattr *tb[NDA_MAX+1];
1920 struct neigh_table *tbl;
1921 struct net_device *dev = NULL;
1922 struct neighbour *neigh;
1929 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1930 nda_policy, extack);
1936 NL_SET_ERR_MSG(extack, "Network address not specified");
1940 ndm = nlmsg_data(nlh);
1941 ndm_flags = ndm->ndm_flags;
1942 if (tb[NDA_FLAGS_EXT]) {
1943 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1945 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1946 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1947 hweight32(NTF_EXT_MASK)));
1948 ndm_flags |= (ext << NTF_EXT_SHIFT);
1950 if (ndm->ndm_ifindex) {
1951 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1957 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1958 NL_SET_ERR_MSG(extack, "Invalid link address");
1963 tbl = neigh_find_table(ndm->ndm_family);
1965 return -EAFNOSUPPORT;
1967 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1968 NL_SET_ERR_MSG(extack, "Invalid network address");
1972 dst = nla_data(tb[NDA_DST]);
1973 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1975 if (tb[NDA_PROTOCOL])
1976 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1977 if (ndm_flags & NTF_PROXY) {
1978 struct pneigh_entry *pn;
1980 if (ndm_flags & NTF_MANAGED) {
1981 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
1986 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1988 pn->flags = ndm_flags;
1990 pn->protocol = protocol;
1997 NL_SET_ERR_MSG(extack, "Device not specified");
2001 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2006 neigh = neigh_lookup(tbl, dst, dev);
2007 if (neigh == NULL) {
2008 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2009 bool exempt_from_gc = ndm_permanent ||
2010 ndm_flags & NTF_EXT_LEARNED;
2012 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2016 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2017 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2022 neigh = ___neigh_create(tbl, dst, dev,
2024 (NTF_EXT_LEARNED | NTF_MANAGED),
2025 exempt_from_gc, true);
2026 if (IS_ERR(neigh)) {
2027 err = PTR_ERR(neigh);
2031 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2033 neigh_release(neigh);
2037 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2038 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2039 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2043 neigh->protocol = protocol;
2044 if (ndm_flags & NTF_EXT_LEARNED)
2045 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2046 if (ndm_flags & NTF_ROUTER)
2047 flags |= NEIGH_UPDATE_F_ISROUTER;
2048 if (ndm_flags & NTF_MANAGED)
2049 flags |= NEIGH_UPDATE_F_MANAGED;
2050 if (ndm_flags & NTF_USE)
2051 flags |= NEIGH_UPDATE_F_USE;
2053 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2054 NETLINK_CB(skb).portid, extack);
2055 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2056 neigh_event_send(neigh, NULL);
2059 neigh_release(neigh);
2064 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2066 struct nlattr *nest;
2068 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2073 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2074 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2075 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2076 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2077 /* approximative value for deprecated QUEUE_LEN (in packets) */
2078 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2079 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2080 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2081 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2082 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2083 NEIGH_VAR(parms, UCAST_PROBES)) ||
2084 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2085 NEIGH_VAR(parms, MCAST_PROBES)) ||
2086 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2087 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2088 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2090 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2091 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2092 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2093 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2094 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2095 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2096 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2097 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2098 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2099 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2100 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2101 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2102 nla_put_msecs(skb, NDTPA_LOCKTIME,
2103 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2104 goto nla_put_failure;
2105 return nla_nest_end(skb, nest);
2108 nla_nest_cancel(skb, nest);
2112 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2113 u32 pid, u32 seq, int type, int flags)
2115 struct nlmsghdr *nlh;
2116 struct ndtmsg *ndtmsg;
2118 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2122 ndtmsg = nlmsg_data(nlh);
2124 read_lock_bh(&tbl->lock);
2125 ndtmsg->ndtm_family = tbl->family;
2126 ndtmsg->ndtm_pad1 = 0;
2127 ndtmsg->ndtm_pad2 = 0;
2129 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2130 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2131 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2132 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2133 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2134 goto nla_put_failure;
2136 unsigned long now = jiffies;
2137 long flush_delta = now - tbl->last_flush;
2138 long rand_delta = now - tbl->last_rand;
2139 struct neigh_hash_table *nht;
2140 struct ndt_config ndc = {
2141 .ndtc_key_len = tbl->key_len,
2142 .ndtc_entry_size = tbl->entry_size,
2143 .ndtc_entries = atomic_read(&tbl->entries),
2144 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2145 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2146 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2150 nht = rcu_dereference_bh(tbl->nht);
2151 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2152 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2153 rcu_read_unlock_bh();
2155 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2156 goto nla_put_failure;
2161 struct ndt_stats ndst;
2163 memset(&ndst, 0, sizeof(ndst));
2165 for_each_possible_cpu(cpu) {
2166 struct neigh_statistics *st;
2168 st = per_cpu_ptr(tbl->stats, cpu);
2169 ndst.ndts_allocs += st->allocs;
2170 ndst.ndts_destroys += st->destroys;
2171 ndst.ndts_hash_grows += st->hash_grows;
2172 ndst.ndts_res_failed += st->res_failed;
2173 ndst.ndts_lookups += st->lookups;
2174 ndst.ndts_hits += st->hits;
2175 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2176 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2177 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2178 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2179 ndst.ndts_table_fulls += st->table_fulls;
2182 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2184 goto nla_put_failure;
2187 BUG_ON(tbl->parms.dev);
2188 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2189 goto nla_put_failure;
2191 read_unlock_bh(&tbl->lock);
2192 nlmsg_end(skb, nlh);
2196 read_unlock_bh(&tbl->lock);
2197 nlmsg_cancel(skb, nlh);
2201 static int neightbl_fill_param_info(struct sk_buff *skb,
2202 struct neigh_table *tbl,
2203 struct neigh_parms *parms,
2204 u32 pid, u32 seq, int type,
2207 struct ndtmsg *ndtmsg;
2208 struct nlmsghdr *nlh;
2210 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2214 ndtmsg = nlmsg_data(nlh);
2216 read_lock_bh(&tbl->lock);
2217 ndtmsg->ndtm_family = tbl->family;
2218 ndtmsg->ndtm_pad1 = 0;
2219 ndtmsg->ndtm_pad2 = 0;
2221 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2222 neightbl_fill_parms(skb, parms) < 0)
2225 read_unlock_bh(&tbl->lock);
2226 nlmsg_end(skb, nlh);
2229 read_unlock_bh(&tbl->lock);
2230 nlmsg_cancel(skb, nlh);
2234 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2235 [NDTA_NAME] = { .type = NLA_STRING },
2236 [NDTA_THRESH1] = { .type = NLA_U32 },
2237 [NDTA_THRESH2] = { .type = NLA_U32 },
2238 [NDTA_THRESH3] = { .type = NLA_U32 },
2239 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2240 [NDTA_PARMS] = { .type = NLA_NESTED },
2243 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2244 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2245 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2246 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2247 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2248 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2249 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2250 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2251 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2252 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2253 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2254 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2255 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2256 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2257 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2260 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2261 struct netlink_ext_ack *extack)
2263 struct net *net = sock_net(skb->sk);
2264 struct neigh_table *tbl;
2265 struct ndtmsg *ndtmsg;
2266 struct nlattr *tb[NDTA_MAX+1];
2270 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2271 nl_neightbl_policy, extack);
2275 if (tb[NDTA_NAME] == NULL) {
2280 ndtmsg = nlmsg_data(nlh);
2282 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2283 tbl = neigh_tables[tidx];
2286 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2288 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2298 * We acquire tbl->lock to be nice to the periodic timers and
2299 * make sure they always see a consistent set of values.
2301 write_lock_bh(&tbl->lock);
2303 if (tb[NDTA_PARMS]) {
2304 struct nlattr *tbp[NDTPA_MAX+1];
2305 struct neigh_parms *p;
2308 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2310 nl_ntbl_parm_policy, extack);
2312 goto errout_tbl_lock;
2314 if (tbp[NDTPA_IFINDEX])
2315 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2317 p = lookup_neigh_parms(tbl, net, ifindex);
2320 goto errout_tbl_lock;
2323 for (i = 1; i <= NDTPA_MAX; i++) {
2328 case NDTPA_QUEUE_LEN:
2329 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2330 nla_get_u32(tbp[i]) *
2331 SKB_TRUESIZE(ETH_FRAME_LEN));
2333 case NDTPA_QUEUE_LENBYTES:
2334 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2335 nla_get_u32(tbp[i]));
2337 case NDTPA_PROXY_QLEN:
2338 NEIGH_VAR_SET(p, PROXY_QLEN,
2339 nla_get_u32(tbp[i]));
2341 case NDTPA_APP_PROBES:
2342 NEIGH_VAR_SET(p, APP_PROBES,
2343 nla_get_u32(tbp[i]));
2345 case NDTPA_UCAST_PROBES:
2346 NEIGH_VAR_SET(p, UCAST_PROBES,
2347 nla_get_u32(tbp[i]));
2349 case NDTPA_MCAST_PROBES:
2350 NEIGH_VAR_SET(p, MCAST_PROBES,
2351 nla_get_u32(tbp[i]));
2353 case NDTPA_MCAST_REPROBES:
2354 NEIGH_VAR_SET(p, MCAST_REPROBES,
2355 nla_get_u32(tbp[i]));
2357 case NDTPA_BASE_REACHABLE_TIME:
2358 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2359 nla_get_msecs(tbp[i]));
2360 /* update reachable_time as well, otherwise, the change will
2361 * only be effective after the next time neigh_periodic_work
2362 * decides to recompute it (can be multiple minutes)
2365 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2367 case NDTPA_GC_STALETIME:
2368 NEIGH_VAR_SET(p, GC_STALETIME,
2369 nla_get_msecs(tbp[i]));
2371 case NDTPA_DELAY_PROBE_TIME:
2372 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2373 nla_get_msecs(tbp[i]));
2374 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2376 case NDTPA_RETRANS_TIME:
2377 NEIGH_VAR_SET(p, RETRANS_TIME,
2378 nla_get_msecs(tbp[i]));
2380 case NDTPA_ANYCAST_DELAY:
2381 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2382 nla_get_msecs(tbp[i]));
2384 case NDTPA_PROXY_DELAY:
2385 NEIGH_VAR_SET(p, PROXY_DELAY,
2386 nla_get_msecs(tbp[i]));
2388 case NDTPA_LOCKTIME:
2389 NEIGH_VAR_SET(p, LOCKTIME,
2390 nla_get_msecs(tbp[i]));
2397 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2398 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2399 !net_eq(net, &init_net))
2400 goto errout_tbl_lock;
2402 if (tb[NDTA_THRESH1])
2403 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2405 if (tb[NDTA_THRESH2])
2406 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2408 if (tb[NDTA_THRESH3])
2409 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2411 if (tb[NDTA_GC_INTERVAL])
2412 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2417 write_unlock_bh(&tbl->lock);
2422 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2423 struct netlink_ext_ack *extack)
2425 struct ndtmsg *ndtm;
2427 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2428 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2432 ndtm = nlmsg_data(nlh);
2433 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2434 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2438 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2439 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2446 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2448 const struct nlmsghdr *nlh = cb->nlh;
2449 struct net *net = sock_net(skb->sk);
2450 int family, tidx, nidx = 0;
2451 int tbl_skip = cb->args[0];
2452 int neigh_skip = cb->args[1];
2453 struct neigh_table *tbl;
2455 if (cb->strict_check) {
2456 int err = neightbl_valid_dump_info(nlh, cb->extack);
2462 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2464 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2465 struct neigh_parms *p;
2467 tbl = neigh_tables[tidx];
2471 if (tidx < tbl_skip || (family && tbl->family != family))
2474 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2475 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2480 p = list_next_entry(&tbl->parms, list);
2481 list_for_each_entry_from(p, &tbl->parms_list, list) {
2482 if (!net_eq(neigh_parms_net(p), net))
2485 if (nidx < neigh_skip)
2488 if (neightbl_fill_param_info(skb, tbl, p,
2489 NETLINK_CB(cb->skb).portid,
2507 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2508 u32 pid, u32 seq, int type, unsigned int flags)
2510 u32 neigh_flags, neigh_flags_ext;
2511 unsigned long now = jiffies;
2512 struct nda_cacheinfo ci;
2513 struct nlmsghdr *nlh;
2516 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2520 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2521 neigh_flags = neigh->flags & NTF_OLD_MASK;
2523 ndm = nlmsg_data(nlh);
2524 ndm->ndm_family = neigh->ops->family;
2527 ndm->ndm_flags = neigh_flags;
2528 ndm->ndm_type = neigh->type;
2529 ndm->ndm_ifindex = neigh->dev->ifindex;
2531 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2532 goto nla_put_failure;
2534 read_lock_bh(&neigh->lock);
2535 ndm->ndm_state = neigh->nud_state;
2536 if (neigh->nud_state & NUD_VALID) {
2537 char haddr[MAX_ADDR_LEN];
2539 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2540 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2541 read_unlock_bh(&neigh->lock);
2542 goto nla_put_failure;
2546 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2547 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2548 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2549 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2550 read_unlock_bh(&neigh->lock);
2552 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2553 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2554 goto nla_put_failure;
2556 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2557 goto nla_put_failure;
2558 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2559 goto nla_put_failure;
2561 nlmsg_end(skb, nlh);
2565 nlmsg_cancel(skb, nlh);
2569 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2570 u32 pid, u32 seq, int type, unsigned int flags,
2571 struct neigh_table *tbl)
2573 u32 neigh_flags, neigh_flags_ext;
2574 struct nlmsghdr *nlh;
2577 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2581 neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2582 neigh_flags = pn->flags & NTF_OLD_MASK;
2584 ndm = nlmsg_data(nlh);
2585 ndm->ndm_family = tbl->family;
2588 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2589 ndm->ndm_type = RTN_UNICAST;
2590 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2591 ndm->ndm_state = NUD_NONE;
2593 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2594 goto nla_put_failure;
2596 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2597 goto nla_put_failure;
2598 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2599 goto nla_put_failure;
2601 nlmsg_end(skb, nlh);
2605 nlmsg_cancel(skb, nlh);
2609 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2611 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2612 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2615 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2617 struct net_device *master;
2622 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2624 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2625 * invalid value for ifindex to denote "no master".
2627 if (master_idx == -1)
2630 if (!master || master->ifindex != master_idx)
2636 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2638 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2644 struct neigh_dump_filter {
2649 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2650 struct netlink_callback *cb,
2651 struct neigh_dump_filter *filter)
2653 struct net *net = sock_net(skb->sk);
2654 struct neighbour *n;
2655 int rc, h, s_h = cb->args[1];
2656 int idx, s_idx = idx = cb->args[2];
2657 struct neigh_hash_table *nht;
2658 unsigned int flags = NLM_F_MULTI;
2660 if (filter->dev_idx || filter->master_idx)
2661 flags |= NLM_F_DUMP_FILTERED;
2664 nht = rcu_dereference_bh(tbl->nht);
2666 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2669 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2671 n = rcu_dereference_bh(n->next)) {
2672 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2674 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2675 neigh_master_filtered(n->dev, filter->master_idx))
2677 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2690 rcu_read_unlock_bh();
2696 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2697 struct netlink_callback *cb,
2698 struct neigh_dump_filter *filter)
2700 struct pneigh_entry *n;
2701 struct net *net = sock_net(skb->sk);
2702 int rc, h, s_h = cb->args[3];
2703 int idx, s_idx = idx = cb->args[4];
2704 unsigned int flags = NLM_F_MULTI;
2706 if (filter->dev_idx || filter->master_idx)
2707 flags |= NLM_F_DUMP_FILTERED;
2709 read_lock_bh(&tbl->lock);
2711 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2714 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2715 if (idx < s_idx || pneigh_net(n) != net)
2717 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2718 neigh_master_filtered(n->dev, filter->master_idx))
2720 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2722 RTM_NEWNEIGH, flags, tbl) < 0) {
2723 read_unlock_bh(&tbl->lock);
2732 read_unlock_bh(&tbl->lock);
2741 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2743 struct neigh_dump_filter *filter,
2744 struct netlink_ext_ack *extack)
2746 struct nlattr *tb[NDA_MAX + 1];
2752 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2753 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2757 ndm = nlmsg_data(nlh);
2758 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2759 ndm->ndm_state || ndm->ndm_type) {
2760 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2764 if (ndm->ndm_flags & ~NTF_PROXY) {
2765 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2769 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2770 tb, NDA_MAX, nda_policy,
2773 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2774 NDA_MAX, nda_policy, extack);
2779 for (i = 0; i <= NDA_MAX; ++i) {
2783 /* all new attributes should require strict_check */
2786 filter->dev_idx = nla_get_u32(tb[i]);
2789 filter->master_idx = nla_get_u32(tb[i]);
2793 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2802 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2804 const struct nlmsghdr *nlh = cb->nlh;
2805 struct neigh_dump_filter filter = {};
2806 struct neigh_table *tbl;
2811 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2813 /* check for full ndmsg structure presence, family member is
2814 * the same for both structures
2816 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2817 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2820 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2821 if (err < 0 && cb->strict_check)
2826 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2827 tbl = neigh_tables[t];
2831 if (t < s_t || (family && tbl->family != family))
2834 memset(&cb->args[1], 0, sizeof(cb->args) -
2835 sizeof(cb->args[0]));
2837 err = pneigh_dump_table(tbl, skb, cb, &filter);
2839 err = neigh_dump_table(tbl, skb, cb, &filter);
2848 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2849 struct neigh_table **tbl,
2850 void **dst, int *dev_idx, u8 *ndm_flags,
2851 struct netlink_ext_ack *extack)
2853 struct nlattr *tb[NDA_MAX + 1];
2857 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2858 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2862 ndm = nlmsg_data(nlh);
2863 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2865 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2869 if (ndm->ndm_flags & ~NTF_PROXY) {
2870 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2874 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2875 NDA_MAX, nda_policy, extack);
2879 *ndm_flags = ndm->ndm_flags;
2880 *dev_idx = ndm->ndm_ifindex;
2881 *tbl = neigh_find_table(ndm->ndm_family);
2883 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2884 return -EAFNOSUPPORT;
2887 for (i = 0; i <= NDA_MAX; ++i) {
2893 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2894 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2897 *dst = nla_data(tb[i]);
2900 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2908 static inline size_t neigh_nlmsg_size(void)
2910 return NLMSG_ALIGN(sizeof(struct ndmsg))
2911 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2912 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2913 + nla_total_size(sizeof(struct nda_cacheinfo))
2914 + nla_total_size(4) /* NDA_PROBES */
2915 + nla_total_size(4) /* NDA_FLAGS_EXT */
2916 + nla_total_size(1); /* NDA_PROTOCOL */
2919 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2922 struct sk_buff *skb;
2925 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2929 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2935 err = rtnl_unicast(skb, net, pid);
2940 static inline size_t pneigh_nlmsg_size(void)
2942 return NLMSG_ALIGN(sizeof(struct ndmsg))
2943 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2944 + nla_total_size(4) /* NDA_FLAGS_EXT */
2945 + nla_total_size(1); /* NDA_PROTOCOL */
2948 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2949 u32 pid, u32 seq, struct neigh_table *tbl)
2951 struct sk_buff *skb;
2954 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2958 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2964 err = rtnl_unicast(skb, net, pid);
2969 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2970 struct netlink_ext_ack *extack)
2972 struct net *net = sock_net(in_skb->sk);
2973 struct net_device *dev = NULL;
2974 struct neigh_table *tbl = NULL;
2975 struct neighbour *neigh;
2981 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2987 dev = __dev_get_by_index(net, dev_idx);
2989 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2995 NL_SET_ERR_MSG(extack, "Network address not specified");
2999 if (ndm_flags & NTF_PROXY) {
3000 struct pneigh_entry *pn;
3002 pn = pneigh_lookup(tbl, net, dst, dev, 0);
3004 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3007 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3008 nlh->nlmsg_seq, tbl);
3012 NL_SET_ERR_MSG(extack, "No device specified");
3016 neigh = neigh_lookup(tbl, dst, dev);
3018 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3022 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3025 neigh_release(neigh);
3030 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3033 struct neigh_hash_table *nht;
3036 nht = rcu_dereference_bh(tbl->nht);
3038 read_lock(&tbl->lock); /* avoid resizes */
3039 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3040 struct neighbour *n;
3042 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
3044 n = rcu_dereference_bh(n->next))
3047 read_unlock(&tbl->lock);
3048 rcu_read_unlock_bh();
3050 EXPORT_SYMBOL(neigh_for_each);
3052 /* The tbl->lock must be held as a writer and BH disabled. */
3053 void __neigh_for_each_release(struct neigh_table *tbl,
3054 int (*cb)(struct neighbour *))
3057 struct neigh_hash_table *nht;
3059 nht = rcu_dereference_protected(tbl->nht,
3060 lockdep_is_held(&tbl->lock));
3061 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3062 struct neighbour *n;
3063 struct neighbour __rcu **np;
3065 np = &nht->hash_buckets[chain];
3066 while ((n = rcu_dereference_protected(*np,
3067 lockdep_is_held(&tbl->lock))) != NULL) {
3070 write_lock(&n->lock);
3073 rcu_assign_pointer(*np,
3074 rcu_dereference_protected(n->next,
3075 lockdep_is_held(&tbl->lock)));
3079 write_unlock(&n->lock);
3081 neigh_cleanup_and_release(n);
3085 EXPORT_SYMBOL(__neigh_for_each_release);
3087 int neigh_xmit(int index, struct net_device *dev,
3088 const void *addr, struct sk_buff *skb)
3090 int err = -EAFNOSUPPORT;
3091 if (likely(index < NEIGH_NR_TABLES)) {
3092 struct neigh_table *tbl;
3093 struct neighbour *neigh;
3095 tbl = neigh_tables[index];
3099 if (index == NEIGH_ARP_TABLE) {
3100 u32 key = *((u32 *)addr);
3102 neigh = __ipv4_neigh_lookup_noref(dev, key);
3104 neigh = __neigh_lookup_noref(tbl, addr, dev);
3107 neigh = __neigh_create(tbl, addr, dev, false);
3108 err = PTR_ERR(neigh);
3109 if (IS_ERR(neigh)) {
3110 rcu_read_unlock_bh();
3113 err = neigh->output(neigh, skb);
3114 rcu_read_unlock_bh();
3116 else if (index == NEIGH_LINK_TABLE) {
3117 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3118 addr, NULL, skb->len);
3121 err = dev_queue_xmit(skb);
3129 EXPORT_SYMBOL(neigh_xmit);
3131 #ifdef CONFIG_PROC_FS
3133 static struct neighbour *neigh_get_first(struct seq_file *seq)
3135 struct neigh_seq_state *state = seq->private;
3136 struct net *net = seq_file_net(seq);
3137 struct neigh_hash_table *nht = state->nht;
3138 struct neighbour *n = NULL;
3141 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3142 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3143 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3146 if (!net_eq(dev_net(n->dev), net))
3148 if (state->neigh_sub_iter) {
3152 v = state->neigh_sub_iter(state, n, &fakep);
3156 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3158 if (n->nud_state & ~NUD_NOARP)
3161 n = rcu_dereference_bh(n->next);
3167 state->bucket = bucket;
3172 static struct neighbour *neigh_get_next(struct seq_file *seq,
3173 struct neighbour *n,
3176 struct neigh_seq_state *state = seq->private;
3177 struct net *net = seq_file_net(seq);
3178 struct neigh_hash_table *nht = state->nht;
3180 if (state->neigh_sub_iter) {
3181 void *v = state->neigh_sub_iter(state, n, pos);
3185 n = rcu_dereference_bh(n->next);
3189 if (!net_eq(dev_net(n->dev), net))
3191 if (state->neigh_sub_iter) {
3192 void *v = state->neigh_sub_iter(state, n, pos);
3197 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3200 if (n->nud_state & ~NUD_NOARP)
3203 n = rcu_dereference_bh(n->next);
3209 if (++state->bucket >= (1 << nht->hash_shift))
3212 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3220 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3222 struct neighbour *n = neigh_get_first(seq);
3227 n = neigh_get_next(seq, n, pos);
3232 return *pos ? NULL : n;
3235 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3237 struct neigh_seq_state *state = seq->private;
3238 struct net *net = seq_file_net(seq);
3239 struct neigh_table *tbl = state->tbl;
3240 struct pneigh_entry *pn = NULL;
3243 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3244 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3245 pn = tbl->phash_buckets[bucket];
3246 while (pn && !net_eq(pneigh_net(pn), net))
3251 state->bucket = bucket;
3256 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3257 struct pneigh_entry *pn,
3260 struct neigh_seq_state *state = seq->private;
3261 struct net *net = seq_file_net(seq);
3262 struct neigh_table *tbl = state->tbl;
3266 } while (pn && !net_eq(pneigh_net(pn), net));
3269 if (++state->bucket > PNEIGH_HASHMASK)
3271 pn = tbl->phash_buckets[state->bucket];
3272 while (pn && !net_eq(pneigh_net(pn), net))
3284 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3286 struct pneigh_entry *pn = pneigh_get_first(seq);
3291 pn = pneigh_get_next(seq, pn, pos);
3296 return *pos ? NULL : pn;
3299 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3301 struct neigh_seq_state *state = seq->private;
3303 loff_t idxpos = *pos;
3305 rc = neigh_get_idx(seq, &idxpos);
3306 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3307 rc = pneigh_get_idx(seq, &idxpos);
3312 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3313 __acquires(tbl->lock)
3316 struct neigh_seq_state *state = seq->private;
3320 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3323 state->nht = rcu_dereference_bh(tbl->nht);
3324 read_lock(&tbl->lock);
3326 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3328 EXPORT_SYMBOL(neigh_seq_start);
3330 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3332 struct neigh_seq_state *state;
3335 if (v == SEQ_START_TOKEN) {
3336 rc = neigh_get_first(seq);
3340 state = seq->private;
3341 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3342 rc = neigh_get_next(seq, v, NULL);
3345 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3346 rc = pneigh_get_first(seq);
3348 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3349 rc = pneigh_get_next(seq, v, NULL);
3355 EXPORT_SYMBOL(neigh_seq_next);
3357 void neigh_seq_stop(struct seq_file *seq, void *v)
3358 __releases(tbl->lock)
3361 struct neigh_seq_state *state = seq->private;
3362 struct neigh_table *tbl = state->tbl;
3364 read_unlock(&tbl->lock);
3365 rcu_read_unlock_bh();
3367 EXPORT_SYMBOL(neigh_seq_stop);
3369 /* statistics via seq_file */
3371 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3373 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3377 return SEQ_START_TOKEN;
3379 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3380 if (!cpu_possible(cpu))
3383 return per_cpu_ptr(tbl->stats, cpu);
3388 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3390 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3393 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3394 if (!cpu_possible(cpu))
3397 return per_cpu_ptr(tbl->stats, cpu);
3403 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3408 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3410 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3411 struct neigh_statistics *st = v;
3413 if (v == SEQ_START_TOKEN) {
3414 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3418 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3419 "%08lx %08lx %08lx "
3420 "%08lx %08lx %08lx\n",
3421 atomic_read(&tbl->entries),
3432 st->rcv_probes_mcast,
3433 st->rcv_probes_ucast,
3435 st->periodic_gc_runs,
3444 static const struct seq_operations neigh_stat_seq_ops = {
3445 .start = neigh_stat_seq_start,
3446 .next = neigh_stat_seq_next,
3447 .stop = neigh_stat_seq_stop,
3448 .show = neigh_stat_seq_show,
3450 #endif /* CONFIG_PROC_FS */
3452 static void __neigh_notify(struct neighbour *n, int type, int flags,
3455 struct net *net = dev_net(n->dev);
3456 struct sk_buff *skb;
3459 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3463 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3465 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3466 WARN_ON(err == -EMSGSIZE);
3470 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3474 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3477 void neigh_app_ns(struct neighbour *n)
3479 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3481 EXPORT_SYMBOL(neigh_app_ns);
3483 #ifdef CONFIG_SYSCTL
3484 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3486 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3487 void *buffer, size_t *lenp, loff_t *ppos)
3490 struct ctl_table tmp = *ctl;
3492 tmp.extra1 = SYSCTL_ZERO;
3493 tmp.extra2 = &unres_qlen_max;
3496 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3497 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3500 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3504 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3509 return __in_dev_arp_parms_get_rcu(dev);
3511 return __in6_dev_nd_parms_get_rcu(dev);
3516 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3519 struct net_device *dev;
3520 int family = neigh_parms_family(p);
3523 for_each_netdev_rcu(net, dev) {
3524 struct neigh_parms *dst_p =
3525 neigh_get_dev_parms_rcu(dev, family);
3527 if (dst_p && !test_bit(index, dst_p->data_state))
3528 dst_p->data[index] = p->data[index];
3533 static void neigh_proc_update(struct ctl_table *ctl, int write)
3535 struct net_device *dev = ctl->extra1;
3536 struct neigh_parms *p = ctl->extra2;
3537 struct net *net = neigh_parms_net(p);
3538 int index = (int *) ctl->data - p->data;
3543 set_bit(index, p->data_state);
3544 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3545 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3546 if (!dev) /* NULL dev means this is default value */
3547 neigh_copy_dflt_parms(net, p, index);
3550 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3551 void *buffer, size_t *lenp,
3554 struct ctl_table tmp = *ctl;
3557 tmp.extra1 = SYSCTL_ZERO;
3558 tmp.extra2 = SYSCTL_INT_MAX;
3560 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3561 neigh_proc_update(ctl, write);
3565 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3566 size_t *lenp, loff_t *ppos)
3568 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3570 neigh_proc_update(ctl, write);
3573 EXPORT_SYMBOL(neigh_proc_dointvec);
3575 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3576 size_t *lenp, loff_t *ppos)
3578 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3580 neigh_proc_update(ctl, write);
3583 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3585 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3586 void *buffer, size_t *lenp,
3589 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3591 neigh_proc_update(ctl, write);
3595 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3596 void *buffer, size_t *lenp, loff_t *ppos)
3598 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3600 neigh_proc_update(ctl, write);
3603 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3605 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3606 void *buffer, size_t *lenp,
3609 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3611 neigh_proc_update(ctl, write);
3615 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3616 void *buffer, size_t *lenp,
3619 struct neigh_parms *p = ctl->extra2;
3622 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3623 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3624 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3625 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3629 if (write && ret == 0) {
3630 /* update reachable_time as well, otherwise, the change will
3631 * only be effective after the next time neigh_periodic_work
3632 * decides to recompute it
3635 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3640 #define NEIGH_PARMS_DATA_OFFSET(index) \
3641 (&((struct neigh_parms *) 0)->data[index])
3643 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3644 [NEIGH_VAR_ ## attr] = { \
3646 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3647 .maxlen = sizeof(int), \
3649 .proc_handler = proc, \
3652 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3653 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3655 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3656 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3658 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3659 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3661 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3662 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3664 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3665 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3667 static struct neigh_sysctl_table {
3668 struct ctl_table_header *sysctl_header;
3669 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3670 } neigh_sysctl_template __read_mostly = {
3672 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3673 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3674 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3675 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3676 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3677 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3678 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3679 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3680 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3681 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3682 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3683 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3684 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3685 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3686 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3687 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3688 [NEIGH_VAR_GC_INTERVAL] = {
3689 .procname = "gc_interval",
3690 .maxlen = sizeof(int),
3692 .proc_handler = proc_dointvec_jiffies,
3694 [NEIGH_VAR_GC_THRESH1] = {
3695 .procname = "gc_thresh1",
3696 .maxlen = sizeof(int),
3698 .extra1 = SYSCTL_ZERO,
3699 .extra2 = SYSCTL_INT_MAX,
3700 .proc_handler = proc_dointvec_minmax,
3702 [NEIGH_VAR_GC_THRESH2] = {
3703 .procname = "gc_thresh2",
3704 .maxlen = sizeof(int),
3706 .extra1 = SYSCTL_ZERO,
3707 .extra2 = SYSCTL_INT_MAX,
3708 .proc_handler = proc_dointvec_minmax,
3710 [NEIGH_VAR_GC_THRESH3] = {
3711 .procname = "gc_thresh3",
3712 .maxlen = sizeof(int),
3714 .extra1 = SYSCTL_ZERO,
3715 .extra2 = SYSCTL_INT_MAX,
3716 .proc_handler = proc_dointvec_minmax,
3722 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3723 proc_handler *handler)
3726 struct neigh_sysctl_table *t;
3727 const char *dev_name_source;
3728 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3731 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3735 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3736 t->neigh_vars[i].data += (long) p;
3737 t->neigh_vars[i].extra1 = dev;
3738 t->neigh_vars[i].extra2 = p;
3742 dev_name_source = dev->name;
3743 /* Terminate the table early */
3744 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3745 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3747 struct neigh_table *tbl = p->tbl;
3748 dev_name_source = "default";
3749 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3750 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3751 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3752 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3757 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3759 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3760 /* RetransTime (in milliseconds)*/
3761 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3762 /* ReachableTime (in milliseconds) */
3763 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3765 /* Those handlers will update p->reachable_time after
3766 * base_reachable_time(_ms) is set to ensure the new timer starts being
3767 * applied after the next neighbour update instead of waiting for
3768 * neigh_periodic_work to update its value (can be multiple minutes)
3769 * So any handler that replaces them should do this as well
3772 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3773 neigh_proc_base_reachable_time;
3774 /* ReachableTime (in milliseconds) */
3775 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3776 neigh_proc_base_reachable_time;
3779 switch (neigh_parms_family(p)) {
3790 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3791 p_name, dev_name_source);
3793 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3794 if (!t->sysctl_header)
3797 p->sysctl_table = t;
3805 EXPORT_SYMBOL(neigh_sysctl_register);
3807 void neigh_sysctl_unregister(struct neigh_parms *p)
3809 if (p->sysctl_table) {
3810 struct neigh_sysctl_table *t = p->sysctl_table;
3811 p->sysctl_table = NULL;
3812 unregister_net_sysctl_table(t->sysctl_header);
3816 EXPORT_SYMBOL(neigh_sysctl_unregister);
3818 #endif /* CONFIG_SYSCTL */
3820 static int __init neigh_init(void)
3822 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3823 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3824 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3826 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3828 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3833 subsys_initcall(neigh_init);