1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 struct net_device *dev);
61 static const struct seq_operations neigh_stat_seq_ops;
65 Neighbour hash table buckets are protected with rwlock tbl->lock.
67 - All the scans/updates to hash buckets MUST be made under this lock.
68 - NOTHING clever should be made under this lock: no callbacks
69 to protocol backends, no attempts to send something to network.
70 It will result in deadlocks, if backend/driver wants to use neighbour
72 - If the entry requires some non-trivial actions, increase
73 its reference count and release table lock.
75 Neighbour entries are protected:
76 - with reference count.
77 - with rwlock neigh->lock
79 Reference count prevents destruction.
81 neigh->lock mainly serializes ll address data and its validity state.
82 However, the same lock is used to protect another entry fields:
86 Again, nothing clever shall be made under neigh->lock,
87 the most complicated procedure, which we allow is dev->hard_header.
88 It is supposed, that dev->hard_header is simplistic and does
89 not make callbacks to neighbour tables.
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 trace_neigh_cleanup_and_release(neigh, 0);
101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
118 static void neigh_mark_dead(struct neighbour *n)
121 if (!list_empty(&n->gc_list)) {
122 list_del_init(&n->gc_list);
123 atomic_dec(&n->tbl->gc_entries);
125 if (!list_empty(&n->managed_list))
126 list_del_init(&n->managed_list);
129 static void neigh_update_gc_list(struct neighbour *n)
131 bool on_gc_list, exempt_from_gc;
133 write_lock_bh(&n->tbl->lock);
134 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static void neigh_update_managed_list(struct neighbour *n)
160 bool on_managed_list, add_to_managed;
162 write_lock_bh(&n->tbl->lock);
163 write_lock(&n->lock);
167 add_to_managed = n->flags & NTF_MANAGED;
168 on_managed_list = !list_empty(&n->managed_list);
170 if (!add_to_managed && on_managed_list)
171 list_del_init(&n->managed_list);
172 else if (add_to_managed && !on_managed_list)
173 list_add_tail(&n->managed_list, &n->tbl->managed_list);
175 write_unlock(&n->lock);
176 write_unlock_bh(&n->tbl->lock);
179 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180 bool *gc_update, bool *managed_update)
182 u32 ndm_flags, old_flags = neigh->flags;
184 if (!(flags & NEIGH_UPDATE_F_ADMIN))
187 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
190 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
191 if (ndm_flags & NTF_EXT_LEARNED)
192 neigh->flags |= NTF_EXT_LEARNED;
194 neigh->flags &= ~NTF_EXT_LEARNED;
198 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199 if (ndm_flags & NTF_MANAGED)
200 neigh->flags |= NTF_MANAGED;
202 neigh->flags &= ~NTF_MANAGED;
204 *managed_update = true;
208 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
209 struct neigh_table *tbl)
213 write_lock(&n->lock);
214 if (refcount_read(&n->refcnt) == 1) {
215 struct neighbour *neigh;
217 neigh = rcu_dereference_protected(n->next,
218 lockdep_is_held(&tbl->lock));
219 rcu_assign_pointer(*np, neigh);
223 write_unlock(&n->lock);
225 neigh_cleanup_and_release(n);
229 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
231 struct neigh_hash_table *nht;
232 void *pkey = ndel->primary_key;
235 struct neighbour __rcu **np;
237 nht = rcu_dereference_protected(tbl->nht,
238 lockdep_is_held(&tbl->lock));
239 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
240 hash_val = hash_val >> (32 - nht->hash_shift);
242 np = &nht->hash_buckets[hash_val];
243 while ((n = rcu_dereference_protected(*np,
244 lockdep_is_held(&tbl->lock)))) {
246 return neigh_del(n, np, tbl);
252 static int neigh_forced_gc(struct neigh_table *tbl)
254 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
255 unsigned long tref = jiffies - 5 * HZ;
256 struct neighbour *n, *tmp;
259 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
261 write_lock_bh(&tbl->lock);
263 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
264 if (refcount_read(&n->refcnt) == 1) {
267 write_lock(&n->lock);
268 if ((n->nud_state == NUD_FAILED) ||
269 (n->nud_state == NUD_NOARP) ||
270 (tbl->is_multicast &&
271 tbl->is_multicast(n->primary_key)) ||
272 time_after(tref, n->updated))
274 write_unlock(&n->lock);
276 if (remove && neigh_remove_one(n, tbl))
278 if (shrunk >= max_clean)
283 tbl->last_flush = jiffies;
285 write_unlock_bh(&tbl->lock);
290 static void neigh_add_timer(struct neighbour *n, unsigned long when)
293 if (unlikely(mod_timer(&n->timer, when))) {
294 printk("NEIGH: BUG, double timer add, state is %x\n",
300 static int neigh_del_timer(struct neighbour *n)
302 if ((n->nud_state & NUD_IN_TIMER) &&
303 del_timer(&n->timer)) {
310 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
315 spin_lock_irqsave(&list->lock, flags);
316 skb = skb_peek(list);
317 while (skb != NULL) {
318 struct sk_buff *skb_next = skb_peek_next(skb, list);
319 struct net_device *dev = skb->dev;
320 if (net == NULL || net_eq(dev_net(dev), net)) {
321 struct in_device *in_dev;
324 in_dev = __in_dev_get_rcu(dev);
326 in_dev->arp_parms->qlen--;
328 __skb_unlink(skb, list);
335 spin_unlock_irqrestore(&list->lock, flags);
338 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
342 struct neigh_hash_table *nht;
344 nht = rcu_dereference_protected(tbl->nht,
345 lockdep_is_held(&tbl->lock));
347 for (i = 0; i < (1 << nht->hash_shift); i++) {
349 struct neighbour __rcu **np = &nht->hash_buckets[i];
351 while ((n = rcu_dereference_protected(*np,
352 lockdep_is_held(&tbl->lock))) != NULL) {
353 if (dev && n->dev != dev) {
357 if (skip_perm && n->nud_state & NUD_PERMANENT) {
361 rcu_assign_pointer(*np,
362 rcu_dereference_protected(n->next,
363 lockdep_is_held(&tbl->lock)));
364 write_lock(&n->lock);
367 if (refcount_read(&n->refcnt) != 1) {
368 /* The most unpleasant situation.
369 We must destroy neighbour entry,
370 but someone still uses it.
372 The destroy will be delayed until
373 the last user releases us, but
374 we must kill timers etc. and move
377 __skb_queue_purge(&n->arp_queue);
378 n->arp_queue_len_bytes = 0;
379 n->output = neigh_blackhole;
380 if (n->nud_state & NUD_VALID)
381 n->nud_state = NUD_NOARP;
383 n->nud_state = NUD_NONE;
384 neigh_dbg(2, "neigh %p is stray\n", n);
386 write_unlock(&n->lock);
387 neigh_cleanup_and_release(n);
392 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
394 write_lock_bh(&tbl->lock);
395 neigh_flush_dev(tbl, dev, false);
396 write_unlock_bh(&tbl->lock);
398 EXPORT_SYMBOL(neigh_changeaddr);
400 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
403 write_lock_bh(&tbl->lock);
404 neigh_flush_dev(tbl, dev, skip_perm);
405 pneigh_ifdown_and_unlock(tbl, dev);
406 pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev));
407 if (skb_queue_empty_lockless(&tbl->proxy_queue))
408 del_timer_sync(&tbl->proxy_timer);
412 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
414 __neigh_ifdown(tbl, dev, true);
417 EXPORT_SYMBOL(neigh_carrier_down);
419 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
421 __neigh_ifdown(tbl, dev, false);
424 EXPORT_SYMBOL(neigh_ifdown);
426 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
427 struct net_device *dev,
428 u32 flags, bool exempt_from_gc)
430 struct neighbour *n = NULL;
431 unsigned long now = jiffies;
437 entries = atomic_inc_return(&tbl->gc_entries) - 1;
438 if (entries >= tbl->gc_thresh3 ||
439 (entries >= tbl->gc_thresh2 &&
440 time_after(now, tbl->last_flush + 5 * HZ))) {
441 if (!neigh_forced_gc(tbl) &&
442 entries >= tbl->gc_thresh3) {
443 net_info_ratelimited("%s: neighbor table overflow!\n",
445 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
451 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
455 __skb_queue_head_init(&n->arp_queue);
456 rwlock_init(&n->lock);
457 seqlock_init(&n->ha_lock);
458 n->updated = n->used = now;
459 n->nud_state = NUD_NONE;
460 n->output = neigh_blackhole;
462 seqlock_init(&n->hh.hh_lock);
463 n->parms = neigh_parms_clone(&tbl->parms);
464 timer_setup(&n->timer, neigh_timer_handler, 0);
466 NEIGH_CACHE_STAT_INC(tbl, allocs);
468 refcount_set(&n->refcnt, 1);
470 INIT_LIST_HEAD(&n->gc_list);
471 INIT_LIST_HEAD(&n->managed_list);
473 atomic_inc(&tbl->entries);
479 atomic_dec(&tbl->gc_entries);
483 static void neigh_get_hash_rnd(u32 *x)
485 *x = get_random_u32() | 1;
488 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
490 size_t size = (1 << shift) * sizeof(struct neighbour *);
491 struct neigh_hash_table *ret;
492 struct neighbour __rcu **buckets;
495 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
498 if (size <= PAGE_SIZE) {
499 buckets = kzalloc(size, GFP_ATOMIC);
501 buckets = (struct neighbour __rcu **)
502 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
504 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
510 ret->hash_buckets = buckets;
511 ret->hash_shift = shift;
512 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
513 neigh_get_hash_rnd(&ret->hash_rnd[i]);
517 static void neigh_hash_free_rcu(struct rcu_head *head)
519 struct neigh_hash_table *nht = container_of(head,
520 struct neigh_hash_table,
522 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
523 struct neighbour __rcu **buckets = nht->hash_buckets;
525 if (size <= PAGE_SIZE) {
528 kmemleak_free(buckets);
529 free_pages((unsigned long)buckets, get_order(size));
534 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
535 unsigned long new_shift)
537 unsigned int i, hash;
538 struct neigh_hash_table *new_nht, *old_nht;
540 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
542 old_nht = rcu_dereference_protected(tbl->nht,
543 lockdep_is_held(&tbl->lock));
544 new_nht = neigh_hash_alloc(new_shift);
548 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
549 struct neighbour *n, *next;
551 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
552 lockdep_is_held(&tbl->lock));
555 hash = tbl->hash(n->primary_key, n->dev,
558 hash >>= (32 - new_nht->hash_shift);
559 next = rcu_dereference_protected(n->next,
560 lockdep_is_held(&tbl->lock));
562 rcu_assign_pointer(n->next,
563 rcu_dereference_protected(
564 new_nht->hash_buckets[hash],
565 lockdep_is_held(&tbl->lock)));
566 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
570 rcu_assign_pointer(tbl->nht, new_nht);
571 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
575 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
576 struct net_device *dev)
580 NEIGH_CACHE_STAT_INC(tbl, lookups);
583 n = __neigh_lookup_noref(tbl, pkey, dev);
585 if (!refcount_inc_not_zero(&n->refcnt))
587 NEIGH_CACHE_STAT_INC(tbl, hits);
590 rcu_read_unlock_bh();
593 EXPORT_SYMBOL(neigh_lookup);
595 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
599 unsigned int key_len = tbl->key_len;
601 struct neigh_hash_table *nht;
603 NEIGH_CACHE_STAT_INC(tbl, lookups);
606 nht = rcu_dereference_bh(tbl->nht);
607 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
609 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
611 n = rcu_dereference_bh(n->next)) {
612 if (!memcmp(n->primary_key, pkey, key_len) &&
613 net_eq(dev_net(n->dev), net)) {
614 if (!refcount_inc_not_zero(&n->refcnt))
616 NEIGH_CACHE_STAT_INC(tbl, hits);
621 rcu_read_unlock_bh();
624 EXPORT_SYMBOL(neigh_lookup_nodev);
626 static struct neighbour *
627 ___neigh_create(struct neigh_table *tbl, const void *pkey,
628 struct net_device *dev, u32 flags,
629 bool exempt_from_gc, bool want_ref)
631 u32 hash_val, key_len = tbl->key_len;
632 struct neighbour *n1, *rc, *n;
633 struct neigh_hash_table *nht;
636 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
637 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
639 rc = ERR_PTR(-ENOBUFS);
643 memcpy(n->primary_key, pkey, key_len);
645 netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
647 /* Protocol specific setup. */
648 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
650 goto out_neigh_release;
653 if (dev->netdev_ops->ndo_neigh_construct) {
654 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
657 goto out_neigh_release;
661 /* Device specific setup. */
662 if (n->parms->neigh_setup &&
663 (error = n->parms->neigh_setup(n)) < 0) {
665 goto out_neigh_release;
668 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
670 write_lock_bh(&tbl->lock);
671 nht = rcu_dereference_protected(tbl->nht,
672 lockdep_is_held(&tbl->lock));
674 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
675 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
677 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
679 if (n->parms->dead) {
680 rc = ERR_PTR(-EINVAL);
684 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
685 lockdep_is_held(&tbl->lock));
687 n1 = rcu_dereference_protected(n1->next,
688 lockdep_is_held(&tbl->lock))) {
689 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
699 list_add_tail(&n->gc_list, &n->tbl->gc_list);
700 if (n->flags & NTF_MANAGED)
701 list_add_tail(&n->managed_list, &n->tbl->managed_list);
704 rcu_assign_pointer(n->next,
705 rcu_dereference_protected(nht->hash_buckets[hash_val],
706 lockdep_is_held(&tbl->lock)));
707 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
708 write_unlock_bh(&tbl->lock);
709 neigh_dbg(2, "neigh %p is created\n", n);
714 write_unlock_bh(&tbl->lock);
717 atomic_dec(&tbl->gc_entries);
722 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
723 struct net_device *dev, bool want_ref)
725 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
727 EXPORT_SYMBOL(__neigh_create);
729 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
731 u32 hash_val = *(u32 *)(pkey + key_len - 4);
732 hash_val ^= (hash_val >> 16);
733 hash_val ^= hash_val >> 8;
734 hash_val ^= hash_val >> 4;
735 hash_val &= PNEIGH_HASHMASK;
739 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
742 unsigned int key_len,
743 struct net_device *dev)
746 if (!memcmp(n->key, pkey, key_len) &&
747 net_eq(pneigh_net(n), net) &&
748 (n->dev == dev || !n->dev))
755 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
756 struct net *net, const void *pkey, struct net_device *dev)
758 unsigned int key_len = tbl->key_len;
759 u32 hash_val = pneigh_hash(pkey, key_len);
761 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
762 net, pkey, key_len, dev);
764 EXPORT_SYMBOL_GPL(__pneigh_lookup);
766 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
767 struct net *net, const void *pkey,
768 struct net_device *dev, int creat)
770 struct pneigh_entry *n;
771 unsigned int key_len = tbl->key_len;
772 u32 hash_val = pneigh_hash(pkey, key_len);
774 read_lock_bh(&tbl->lock);
775 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
776 net, pkey, key_len, dev);
777 read_unlock_bh(&tbl->lock);
784 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
788 write_pnet(&n->net, net);
789 memcpy(n->key, pkey, key_len);
791 netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
793 if (tbl->pconstructor && tbl->pconstructor(n)) {
794 netdev_put(dev, &n->dev_tracker);
800 write_lock_bh(&tbl->lock);
801 n->next = tbl->phash_buckets[hash_val];
802 tbl->phash_buckets[hash_val] = n;
803 write_unlock_bh(&tbl->lock);
807 EXPORT_SYMBOL(pneigh_lookup);
810 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
811 struct net_device *dev)
813 struct pneigh_entry *n, **np;
814 unsigned int key_len = tbl->key_len;
815 u32 hash_val = pneigh_hash(pkey, key_len);
817 write_lock_bh(&tbl->lock);
818 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
820 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
821 net_eq(pneigh_net(n), net)) {
823 write_unlock_bh(&tbl->lock);
824 if (tbl->pdestructor)
826 netdev_put(n->dev, &n->dev_tracker);
831 write_unlock_bh(&tbl->lock);
835 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
836 struct net_device *dev)
838 struct pneigh_entry *n, **np, *freelist = NULL;
841 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
842 np = &tbl->phash_buckets[h];
843 while ((n = *np) != NULL) {
844 if (!dev || n->dev == dev) {
853 write_unlock_bh(&tbl->lock);
854 while ((n = freelist)) {
857 if (tbl->pdestructor)
859 netdev_put(n->dev, &n->dev_tracker);
865 static void neigh_parms_destroy(struct neigh_parms *parms);
867 static inline void neigh_parms_put(struct neigh_parms *parms)
869 if (refcount_dec_and_test(&parms->refcnt))
870 neigh_parms_destroy(parms);
874 * neighbour must already be out of the table;
877 void neigh_destroy(struct neighbour *neigh)
879 struct net_device *dev = neigh->dev;
881 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
884 pr_warn("Destroying alive neighbour %p\n", neigh);
889 if (neigh_del_timer(neigh))
890 pr_warn("Impossible event\n");
892 write_lock_bh(&neigh->lock);
893 __skb_queue_purge(&neigh->arp_queue);
894 write_unlock_bh(&neigh->lock);
895 neigh->arp_queue_len_bytes = 0;
897 if (dev->netdev_ops->ndo_neigh_destroy)
898 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
900 netdev_put(dev, &neigh->dev_tracker);
901 neigh_parms_put(neigh->parms);
903 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
905 atomic_dec(&neigh->tbl->entries);
906 kfree_rcu(neigh, rcu);
908 EXPORT_SYMBOL(neigh_destroy);
910 /* Neighbour state is suspicious;
913 Called with write_locked neigh.
915 static void neigh_suspect(struct neighbour *neigh)
917 neigh_dbg(2, "neigh %p is suspected\n", neigh);
919 neigh->output = neigh->ops->output;
922 /* Neighbour state is OK;
925 Called with write_locked neigh.
927 static void neigh_connect(struct neighbour *neigh)
929 neigh_dbg(2, "neigh %p is connected\n", neigh);
931 neigh->output = neigh->ops->connected_output;
934 static void neigh_periodic_work(struct work_struct *work)
936 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
938 struct neighbour __rcu **np;
940 struct neigh_hash_table *nht;
942 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
944 write_lock_bh(&tbl->lock);
945 nht = rcu_dereference_protected(tbl->nht,
946 lockdep_is_held(&tbl->lock));
949 * periodically recompute ReachableTime from random function
952 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
953 struct neigh_parms *p;
954 tbl->last_rand = jiffies;
955 list_for_each_entry(p, &tbl->parms_list, list)
957 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
960 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
963 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
964 np = &nht->hash_buckets[i];
966 while ((n = rcu_dereference_protected(*np,
967 lockdep_is_held(&tbl->lock))) != NULL) {
970 write_lock(&n->lock);
972 state = n->nud_state;
973 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
974 (n->flags & NTF_EXT_LEARNED)) {
975 write_unlock(&n->lock);
979 if (time_before(n->used, n->confirmed))
980 n->used = n->confirmed;
982 if (refcount_read(&n->refcnt) == 1 &&
983 (state == NUD_FAILED ||
984 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
987 write_unlock(&n->lock);
988 neigh_cleanup_and_release(n);
991 write_unlock(&n->lock);
997 * It's fine to release lock here, even if hash table
998 * grows while we are preempted.
1000 write_unlock_bh(&tbl->lock);
1002 write_lock_bh(&tbl->lock);
1003 nht = rcu_dereference_protected(tbl->nht,
1004 lockdep_is_held(&tbl->lock));
1007 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1008 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1009 * BASE_REACHABLE_TIME.
1011 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1012 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1013 write_unlock_bh(&tbl->lock);
1016 static __inline__ int neigh_max_probes(struct neighbour *n)
1018 struct neigh_parms *p = n->parms;
1019 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1020 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1021 NEIGH_VAR(p, MCAST_PROBES));
1024 static void neigh_invalidate(struct neighbour *neigh)
1025 __releases(neigh->lock)
1026 __acquires(neigh->lock)
1028 struct sk_buff *skb;
1030 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1031 neigh_dbg(2, "neigh %p is failed\n", neigh);
1032 neigh->updated = jiffies;
1034 /* It is very thin place. report_unreachable is very complicated
1035 routine. Particularly, it can hit the same neighbour entry!
1037 So that, we try to be accurate and avoid dead loop. --ANK
1039 while (neigh->nud_state == NUD_FAILED &&
1040 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1041 write_unlock(&neigh->lock);
1042 neigh->ops->error_report(neigh, skb);
1043 write_lock(&neigh->lock);
1045 __skb_queue_purge(&neigh->arp_queue);
1046 neigh->arp_queue_len_bytes = 0;
1049 static void neigh_probe(struct neighbour *neigh)
1050 __releases(neigh->lock)
1052 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1053 /* keep skb alive even if arp_queue overflows */
1055 skb = skb_clone(skb, GFP_ATOMIC);
1056 write_unlock(&neigh->lock);
1057 if (neigh->ops->solicit)
1058 neigh->ops->solicit(neigh, skb);
1059 atomic_inc(&neigh->probes);
1063 /* Called when a timer expires for a neighbour entry. */
1065 static void neigh_timer_handler(struct timer_list *t)
1067 unsigned long now, next;
1068 struct neighbour *neigh = from_timer(neigh, t, timer);
1072 write_lock(&neigh->lock);
1074 state = neigh->nud_state;
1078 if (!(state & NUD_IN_TIMER))
1081 if (state & NUD_REACHABLE) {
1082 if (time_before_eq(now,
1083 neigh->confirmed + neigh->parms->reachable_time)) {
1084 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1085 next = neigh->confirmed + neigh->parms->reachable_time;
1086 } else if (time_before_eq(now,
1088 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1089 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1090 neigh->nud_state = NUD_DELAY;
1091 neigh->updated = jiffies;
1092 neigh_suspect(neigh);
1093 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1095 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1096 neigh->nud_state = NUD_STALE;
1097 neigh->updated = jiffies;
1098 neigh_suspect(neigh);
1101 } else if (state & NUD_DELAY) {
1102 if (time_before_eq(now,
1104 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1105 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1106 neigh->nud_state = NUD_REACHABLE;
1107 neigh->updated = jiffies;
1108 neigh_connect(neigh);
1110 next = neigh->confirmed + neigh->parms->reachable_time;
1112 neigh_dbg(2, "neigh %p is probed\n", neigh);
1113 neigh->nud_state = NUD_PROBE;
1114 neigh->updated = jiffies;
1115 atomic_set(&neigh->probes, 0);
1117 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1121 /* NUD_PROBE|NUD_INCOMPLETE */
1122 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1125 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1126 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1127 neigh->nud_state = NUD_FAILED;
1129 neigh_invalidate(neigh);
1133 if (neigh->nud_state & NUD_IN_TIMER) {
1134 if (time_before(next, jiffies + HZ/100))
1135 next = jiffies + HZ/100;
1136 if (!mod_timer(&neigh->timer, next))
1139 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1143 write_unlock(&neigh->lock);
1147 neigh_update_notify(neigh, 0);
1149 trace_neigh_timer_handler(neigh, 0);
1151 neigh_release(neigh);
1154 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1155 const bool immediate_ok)
1158 bool immediate_probe = false;
1160 write_lock_bh(&neigh->lock);
1163 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1168 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1169 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1170 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1171 unsigned long next, now = jiffies;
1173 atomic_set(&neigh->probes,
1174 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1175 neigh_del_timer(neigh);
1176 neigh->nud_state = NUD_INCOMPLETE;
1177 neigh->updated = now;
1178 if (!immediate_ok) {
1181 immediate_probe = true;
1182 next = now + max(NEIGH_VAR(neigh->parms,
1186 neigh_add_timer(neigh, next);
1188 neigh->nud_state = NUD_FAILED;
1189 neigh->updated = jiffies;
1190 write_unlock_bh(&neigh->lock);
1192 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1195 } else if (neigh->nud_state & NUD_STALE) {
1196 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1197 neigh_del_timer(neigh);
1198 neigh->nud_state = NUD_DELAY;
1199 neigh->updated = jiffies;
1200 neigh_add_timer(neigh, jiffies +
1201 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1204 if (neigh->nud_state == NUD_INCOMPLETE) {
1206 while (neigh->arp_queue_len_bytes + skb->truesize >
1207 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1208 struct sk_buff *buff;
1210 buff = __skb_dequeue(&neigh->arp_queue);
1213 neigh->arp_queue_len_bytes -= buff->truesize;
1214 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1215 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1218 __skb_queue_tail(&neigh->arp_queue, skb);
1219 neigh->arp_queue_len_bytes += skb->truesize;
1224 if (immediate_probe)
1227 write_unlock(&neigh->lock);
1229 trace_neigh_event_send_done(neigh, rc);
1233 if (neigh->nud_state & NUD_STALE)
1235 write_unlock_bh(&neigh->lock);
1236 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1237 trace_neigh_event_send_dead(neigh, 1);
1240 EXPORT_SYMBOL(__neigh_event_send);
1242 static void neigh_update_hhs(struct neighbour *neigh)
1244 struct hh_cache *hh;
1245 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1248 if (neigh->dev->header_ops)
1249 update = neigh->dev->header_ops->cache_update;
1253 if (READ_ONCE(hh->hh_len)) {
1254 write_seqlock_bh(&hh->hh_lock);
1255 update(hh, neigh->dev, neigh->ha);
1256 write_sequnlock_bh(&hh->hh_lock);
1261 /* Generic update routine.
1262 -- lladdr is new lladdr or NULL, if it is not supplied.
1263 -- new is new state.
1265 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1267 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1268 lladdr instead of overriding it
1270 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1271 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1272 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1273 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1275 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1278 Caller MUST hold reference count on the entry.
1280 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1281 u8 new, u32 flags, u32 nlmsg_pid,
1282 struct netlink_ext_ack *extack)
1284 bool gc_update = false, managed_update = false;
1285 int update_isrouter = 0;
1286 struct net_device *dev;
1287 int err, notify = 0;
1290 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1292 write_lock_bh(&neigh->lock);
1295 old = neigh->nud_state;
1299 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1303 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1304 (old & (NUD_NOARP | NUD_PERMANENT)))
1307 neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
1308 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1309 new = old & ~NUD_PERMANENT;
1310 neigh->nud_state = new;
1315 if (!(new & NUD_VALID)) {
1316 neigh_del_timer(neigh);
1317 if (old & NUD_CONNECTED)
1318 neigh_suspect(neigh);
1319 neigh->nud_state = new;
1321 notify = old & NUD_VALID;
1322 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1323 (new & NUD_FAILED)) {
1324 neigh_invalidate(neigh);
1330 /* Compare new lladdr with cached one */
1331 if (!dev->addr_len) {
1332 /* First case: device needs no address. */
1334 } else if (lladdr) {
1335 /* The second case: if something is already cached
1336 and a new address is proposed:
1338 - if they are different, check override flag
1340 if ((old & NUD_VALID) &&
1341 !memcmp(lladdr, neigh->ha, dev->addr_len))
1344 /* No address is supplied; if we know something,
1345 use it, otherwise discard the request.
1348 if (!(old & NUD_VALID)) {
1349 NL_SET_ERR_MSG(extack, "No link layer address given");
1355 /* Update confirmed timestamp for neighbour entry after we
1356 * received ARP packet even if it doesn't change IP to MAC binding.
1358 if (new & NUD_CONNECTED)
1359 neigh->confirmed = jiffies;
1361 /* If entry was valid and address is not changed,
1362 do not change entry state, if new one is STALE.
1365 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1366 if (old & NUD_VALID) {
1367 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1368 update_isrouter = 0;
1369 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1370 (old & NUD_CONNECTED)) {
1376 if (lladdr == neigh->ha && new == NUD_STALE &&
1377 !(flags & NEIGH_UPDATE_F_ADMIN))
1382 /* Update timestamp only once we know we will make a change to the
1383 * neighbour entry. Otherwise we risk to move the locktime window with
1384 * noop updates and ignore relevant ARP updates.
1386 if (new != old || lladdr != neigh->ha)
1387 neigh->updated = jiffies;
1390 neigh_del_timer(neigh);
1391 if (new & NUD_PROBE)
1392 atomic_set(&neigh->probes, 0);
1393 if (new & NUD_IN_TIMER)
1394 neigh_add_timer(neigh, (jiffies +
1395 ((new & NUD_REACHABLE) ?
1396 neigh->parms->reachable_time :
1398 neigh->nud_state = new;
1402 if (lladdr != neigh->ha) {
1403 write_seqlock(&neigh->ha_lock);
1404 memcpy(&neigh->ha, lladdr, dev->addr_len);
1405 write_sequnlock(&neigh->ha_lock);
1406 neigh_update_hhs(neigh);
1407 if (!(new & NUD_CONNECTED))
1408 neigh->confirmed = jiffies -
1409 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1414 if (new & NUD_CONNECTED)
1415 neigh_connect(neigh);
1417 neigh_suspect(neigh);
1418 if (!(old & NUD_VALID)) {
1419 struct sk_buff *skb;
1421 /* Again: avoid dead loop if something went wrong */
1423 while (neigh->nud_state & NUD_VALID &&
1424 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1425 struct dst_entry *dst = skb_dst(skb);
1426 struct neighbour *n2, *n1 = neigh;
1427 write_unlock_bh(&neigh->lock);
1431 /* Why not just use 'neigh' as-is? The problem is that
1432 * things such as shaper, eql, and sch_teql can end up
1433 * using alternative, different, neigh objects to output
1434 * the packet in the output path. So what we need to do
1435 * here is re-lookup the top-level neigh in the path so
1436 * we can reinject the packet there.
1439 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1440 n2 = dst_neigh_lookup_skb(dst, skb);
1444 n1->output(n1, skb);
1449 write_lock_bh(&neigh->lock);
1451 __skb_queue_purge(&neigh->arp_queue);
1452 neigh->arp_queue_len_bytes = 0;
1455 if (update_isrouter)
1456 neigh_update_is_router(neigh, flags, ¬ify);
1457 write_unlock_bh(&neigh->lock);
1458 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1459 neigh_update_gc_list(neigh);
1461 neigh_update_managed_list(neigh);
1463 neigh_update_notify(neigh, nlmsg_pid);
1464 trace_neigh_update_done(neigh, err);
1468 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1469 u32 flags, u32 nlmsg_pid)
1471 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1473 EXPORT_SYMBOL(neigh_update);
1475 /* Update the neigh to listen temporarily for probe responses, even if it is
1476 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1478 void __neigh_set_probe_once(struct neighbour *neigh)
1482 neigh->updated = jiffies;
1483 if (!(neigh->nud_state & NUD_FAILED))
1485 neigh->nud_state = NUD_INCOMPLETE;
1486 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1487 neigh_add_timer(neigh,
1488 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1491 EXPORT_SYMBOL(__neigh_set_probe_once);
1493 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1494 u8 *lladdr, void *saddr,
1495 struct net_device *dev)
1497 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1498 lladdr || !dev->addr_len);
1500 neigh_update(neigh, lladdr, NUD_STALE,
1501 NEIGH_UPDATE_F_OVERRIDE, 0);
1504 EXPORT_SYMBOL(neigh_event_ns);
1506 /* called with read_lock_bh(&n->lock); */
1507 static void neigh_hh_init(struct neighbour *n)
1509 struct net_device *dev = n->dev;
1510 __be16 prot = n->tbl->protocol;
1511 struct hh_cache *hh = &n->hh;
1513 write_lock_bh(&n->lock);
1515 /* Only one thread can come in here and initialize the
1519 dev->header_ops->cache(n, hh, prot);
1521 write_unlock_bh(&n->lock);
1524 /* Slow and careful. */
1526 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1530 if (!neigh_event_send(neigh, skb)) {
1532 struct net_device *dev = neigh->dev;
1535 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1536 neigh_hh_init(neigh);
1539 __skb_pull(skb, skb_network_offset(skb));
1540 seq = read_seqbegin(&neigh->ha_lock);
1541 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1542 neigh->ha, NULL, skb->len);
1543 } while (read_seqretry(&neigh->ha_lock, seq));
1546 rc = dev_queue_xmit(skb);
1557 EXPORT_SYMBOL(neigh_resolve_output);
1559 /* As fast as possible without hh cache */
1561 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1563 struct net_device *dev = neigh->dev;
1568 __skb_pull(skb, skb_network_offset(skb));
1569 seq = read_seqbegin(&neigh->ha_lock);
1570 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1571 neigh->ha, NULL, skb->len);
1572 } while (read_seqretry(&neigh->ha_lock, seq));
1575 err = dev_queue_xmit(skb);
1582 EXPORT_SYMBOL(neigh_connected_output);
1584 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1586 return dev_queue_xmit(skb);
1588 EXPORT_SYMBOL(neigh_direct_output);
1590 static void neigh_managed_work(struct work_struct *work)
1592 struct neigh_table *tbl = container_of(work, struct neigh_table,
1594 struct neighbour *neigh;
1596 write_lock_bh(&tbl->lock);
1597 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1598 neigh_event_send_probe(neigh, NULL, false);
1599 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1600 NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1601 write_unlock_bh(&tbl->lock);
1604 static void neigh_proxy_process(struct timer_list *t)
1606 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1607 long sched_next = 0;
1608 unsigned long now = jiffies;
1609 struct sk_buff *skb, *n;
1611 spin_lock(&tbl->proxy_queue.lock);
1613 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1614 long tdif = NEIGH_CB(skb)->sched_next - now;
1617 struct net_device *dev = skb->dev;
1618 struct in_device *in_dev;
1621 in_dev = __in_dev_get_rcu(dev);
1623 in_dev->arp_parms->qlen--;
1625 __skb_unlink(skb, &tbl->proxy_queue);
1627 if (tbl->proxy_redo && netif_running(dev)) {
1629 tbl->proxy_redo(skb);
1636 } else if (!sched_next || tdif < sched_next)
1639 del_timer(&tbl->proxy_timer);
1641 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1642 spin_unlock(&tbl->proxy_queue.lock);
1645 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1646 struct sk_buff *skb)
1648 unsigned long sched_next = jiffies +
1649 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
1651 if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1656 NEIGH_CB(skb)->sched_next = sched_next;
1657 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1659 spin_lock(&tbl->proxy_queue.lock);
1660 if (del_timer(&tbl->proxy_timer)) {
1661 if (time_before(tbl->proxy_timer.expires, sched_next))
1662 sched_next = tbl->proxy_timer.expires;
1666 __skb_queue_tail(&tbl->proxy_queue, skb);
1668 mod_timer(&tbl->proxy_timer, sched_next);
1669 spin_unlock(&tbl->proxy_queue.lock);
1671 EXPORT_SYMBOL(pneigh_enqueue);
1673 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1674 struct net *net, int ifindex)
1676 struct neigh_parms *p;
1678 list_for_each_entry(p, &tbl->parms_list, list) {
1679 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1680 (!p->dev && !ifindex && net_eq(net, &init_net)))
1687 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1688 struct neigh_table *tbl)
1690 struct neigh_parms *p;
1691 struct net *net = dev_net(dev);
1692 const struct net_device_ops *ops = dev->netdev_ops;
1694 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1697 refcount_set(&p->refcnt, 1);
1699 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1701 netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1703 write_pnet(&p->net, net);
1704 p->sysctl_table = NULL;
1706 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1707 netdev_put(dev, &p->dev_tracker);
1712 write_lock_bh(&tbl->lock);
1713 list_add(&p->list, &tbl->parms.list);
1714 write_unlock_bh(&tbl->lock);
1716 neigh_parms_data_state_cleanall(p);
1720 EXPORT_SYMBOL(neigh_parms_alloc);
1722 static void neigh_rcu_free_parms(struct rcu_head *head)
1724 struct neigh_parms *parms =
1725 container_of(head, struct neigh_parms, rcu_head);
1727 neigh_parms_put(parms);
1730 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1732 if (!parms || parms == &tbl->parms)
1734 write_lock_bh(&tbl->lock);
1735 list_del(&parms->list);
1737 write_unlock_bh(&tbl->lock);
1738 netdev_put(parms->dev, &parms->dev_tracker);
1739 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1741 EXPORT_SYMBOL(neigh_parms_release);
1743 static void neigh_parms_destroy(struct neigh_parms *parms)
1748 static struct lock_class_key neigh_table_proxy_queue_class;
1750 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1752 void neigh_table_init(int index, struct neigh_table *tbl)
1754 unsigned long now = jiffies;
1755 unsigned long phsize;
1757 INIT_LIST_HEAD(&tbl->parms_list);
1758 INIT_LIST_HEAD(&tbl->gc_list);
1759 INIT_LIST_HEAD(&tbl->managed_list);
1761 list_add(&tbl->parms.list, &tbl->parms_list);
1762 write_pnet(&tbl->parms.net, &init_net);
1763 refcount_set(&tbl->parms.refcnt, 1);
1764 tbl->parms.reachable_time =
1765 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1766 tbl->parms.qlen = 0;
1768 tbl->stats = alloc_percpu(struct neigh_statistics);
1770 panic("cannot create neighbour cache statistics");
1772 #ifdef CONFIG_PROC_FS
1773 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1774 &neigh_stat_seq_ops, tbl))
1775 panic("cannot create neighbour proc dir entry");
1778 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1780 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1781 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1783 if (!tbl->nht || !tbl->phash_buckets)
1784 panic("cannot allocate neighbour cache hashes");
1786 if (!tbl->entry_size)
1787 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1788 tbl->key_len, NEIGH_PRIV_ALIGN);
1790 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1792 rwlock_init(&tbl->lock);
1794 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1795 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1796 tbl->parms.reachable_time);
1797 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1798 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1800 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1801 skb_queue_head_init_class(&tbl->proxy_queue,
1802 &neigh_table_proxy_queue_class);
1804 tbl->last_flush = now;
1805 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1807 neigh_tables[index] = tbl;
1809 EXPORT_SYMBOL(neigh_table_init);
1811 int neigh_table_clear(int index, struct neigh_table *tbl)
1813 neigh_tables[index] = NULL;
1814 /* It is not clean... Fix it to unload IPv6 module safely */
1815 cancel_delayed_work_sync(&tbl->managed_work);
1816 cancel_delayed_work_sync(&tbl->gc_work);
1817 del_timer_sync(&tbl->proxy_timer);
1818 pneigh_queue_purge(&tbl->proxy_queue, NULL);
1819 neigh_ifdown(tbl, NULL);
1820 if (atomic_read(&tbl->entries))
1821 pr_crit("neighbour leakage\n");
1823 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1824 neigh_hash_free_rcu);
1827 kfree(tbl->phash_buckets);
1828 tbl->phash_buckets = NULL;
1830 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1832 free_percpu(tbl->stats);
1837 EXPORT_SYMBOL(neigh_table_clear);
1839 static struct neigh_table *neigh_find_table(int family)
1841 struct neigh_table *tbl = NULL;
1845 tbl = neigh_tables[NEIGH_ARP_TABLE];
1848 tbl = neigh_tables[NEIGH_ND_TABLE];
1851 tbl = neigh_tables[NEIGH_DN_TABLE];
1858 const struct nla_policy nda_policy[NDA_MAX+1] = {
1859 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1860 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1861 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1862 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1863 [NDA_PROBES] = { .type = NLA_U32 },
1864 [NDA_VLAN] = { .type = NLA_U16 },
1865 [NDA_PORT] = { .type = NLA_U16 },
1866 [NDA_VNI] = { .type = NLA_U32 },
1867 [NDA_IFINDEX] = { .type = NLA_U32 },
1868 [NDA_MASTER] = { .type = NLA_U32 },
1869 [NDA_PROTOCOL] = { .type = NLA_U8 },
1870 [NDA_NH_ID] = { .type = NLA_U32 },
1871 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1872 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1875 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1876 struct netlink_ext_ack *extack)
1878 struct net *net = sock_net(skb->sk);
1880 struct nlattr *dst_attr;
1881 struct neigh_table *tbl;
1882 struct neighbour *neigh;
1883 struct net_device *dev = NULL;
1887 if (nlmsg_len(nlh) < sizeof(*ndm))
1890 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1892 NL_SET_ERR_MSG(extack, "Network address not specified");
1896 ndm = nlmsg_data(nlh);
1897 if (ndm->ndm_ifindex) {
1898 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1905 tbl = neigh_find_table(ndm->ndm_family);
1907 return -EAFNOSUPPORT;
1909 if (nla_len(dst_attr) < (int)tbl->key_len) {
1910 NL_SET_ERR_MSG(extack, "Invalid network address");
1914 if (ndm->ndm_flags & NTF_PROXY) {
1915 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1922 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1923 if (neigh == NULL) {
1928 err = __neigh_update(neigh, NULL, NUD_FAILED,
1929 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1930 NETLINK_CB(skb).portid, extack);
1931 write_lock_bh(&tbl->lock);
1932 neigh_release(neigh);
1933 neigh_remove_one(neigh, tbl);
1934 write_unlock_bh(&tbl->lock);
1940 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1941 struct netlink_ext_ack *extack)
1943 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1944 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1945 struct net *net = sock_net(skb->sk);
1947 struct nlattr *tb[NDA_MAX+1];
1948 struct neigh_table *tbl;
1949 struct net_device *dev = NULL;
1950 struct neighbour *neigh;
1957 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1958 nda_policy, extack);
1964 NL_SET_ERR_MSG(extack, "Network address not specified");
1968 ndm = nlmsg_data(nlh);
1969 ndm_flags = ndm->ndm_flags;
1970 if (tb[NDA_FLAGS_EXT]) {
1971 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1973 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1974 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1975 hweight32(NTF_EXT_MASK)));
1976 ndm_flags |= (ext << NTF_EXT_SHIFT);
1978 if (ndm->ndm_ifindex) {
1979 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1985 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1986 NL_SET_ERR_MSG(extack, "Invalid link address");
1991 tbl = neigh_find_table(ndm->ndm_family);
1993 return -EAFNOSUPPORT;
1995 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1996 NL_SET_ERR_MSG(extack, "Invalid network address");
2000 dst = nla_data(tb[NDA_DST]);
2001 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2003 if (tb[NDA_PROTOCOL])
2004 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2005 if (ndm_flags & NTF_PROXY) {
2006 struct pneigh_entry *pn;
2008 if (ndm_flags & NTF_MANAGED) {
2009 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2014 pn = pneigh_lookup(tbl, net, dst, dev, 1);
2016 pn->flags = ndm_flags;
2018 pn->protocol = protocol;
2025 NL_SET_ERR_MSG(extack, "Device not specified");
2029 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2034 neigh = neigh_lookup(tbl, dst, dev);
2035 if (neigh == NULL) {
2036 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2037 bool exempt_from_gc = ndm_permanent ||
2038 ndm_flags & NTF_EXT_LEARNED;
2040 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2044 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2045 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2050 neigh = ___neigh_create(tbl, dst, dev,
2052 (NTF_EXT_LEARNED | NTF_MANAGED),
2053 exempt_from_gc, true);
2054 if (IS_ERR(neigh)) {
2055 err = PTR_ERR(neigh);
2059 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2061 neigh_release(neigh);
2065 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2066 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2067 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2071 neigh->protocol = protocol;
2072 if (ndm_flags & NTF_EXT_LEARNED)
2073 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2074 if (ndm_flags & NTF_ROUTER)
2075 flags |= NEIGH_UPDATE_F_ISROUTER;
2076 if (ndm_flags & NTF_MANAGED)
2077 flags |= NEIGH_UPDATE_F_MANAGED;
2078 if (ndm_flags & NTF_USE)
2079 flags |= NEIGH_UPDATE_F_USE;
2081 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2082 NETLINK_CB(skb).portid, extack);
2083 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2084 neigh_event_send(neigh, NULL);
2087 neigh_release(neigh);
2092 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2094 struct nlattr *nest;
2096 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2101 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2102 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2103 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2104 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2105 /* approximative value for deprecated QUEUE_LEN (in packets) */
2106 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2107 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2108 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2109 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2110 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2111 NEIGH_VAR(parms, UCAST_PROBES)) ||
2112 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2113 NEIGH_VAR(parms, MCAST_PROBES)) ||
2114 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2115 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2116 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2118 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2119 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2120 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2121 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2122 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2123 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2124 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2125 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2126 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2127 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2128 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2129 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2130 nla_put_msecs(skb, NDTPA_LOCKTIME,
2131 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2132 nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2133 NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2134 goto nla_put_failure;
2135 return nla_nest_end(skb, nest);
2138 nla_nest_cancel(skb, nest);
2142 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2143 u32 pid, u32 seq, int type, int flags)
2145 struct nlmsghdr *nlh;
2146 struct ndtmsg *ndtmsg;
2148 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2152 ndtmsg = nlmsg_data(nlh);
2154 read_lock_bh(&tbl->lock);
2155 ndtmsg->ndtm_family = tbl->family;
2156 ndtmsg->ndtm_pad1 = 0;
2157 ndtmsg->ndtm_pad2 = 0;
2159 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2160 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2161 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2162 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2163 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2164 goto nla_put_failure;
2166 unsigned long now = jiffies;
2167 long flush_delta = now - tbl->last_flush;
2168 long rand_delta = now - tbl->last_rand;
2169 struct neigh_hash_table *nht;
2170 struct ndt_config ndc = {
2171 .ndtc_key_len = tbl->key_len,
2172 .ndtc_entry_size = tbl->entry_size,
2173 .ndtc_entries = atomic_read(&tbl->entries),
2174 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2175 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2176 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2180 nht = rcu_dereference_bh(tbl->nht);
2181 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2182 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2183 rcu_read_unlock_bh();
2185 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2186 goto nla_put_failure;
2191 struct ndt_stats ndst;
2193 memset(&ndst, 0, sizeof(ndst));
2195 for_each_possible_cpu(cpu) {
2196 struct neigh_statistics *st;
2198 st = per_cpu_ptr(tbl->stats, cpu);
2199 ndst.ndts_allocs += st->allocs;
2200 ndst.ndts_destroys += st->destroys;
2201 ndst.ndts_hash_grows += st->hash_grows;
2202 ndst.ndts_res_failed += st->res_failed;
2203 ndst.ndts_lookups += st->lookups;
2204 ndst.ndts_hits += st->hits;
2205 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2206 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2207 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2208 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2209 ndst.ndts_table_fulls += st->table_fulls;
2212 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2214 goto nla_put_failure;
2217 BUG_ON(tbl->parms.dev);
2218 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2219 goto nla_put_failure;
2221 read_unlock_bh(&tbl->lock);
2222 nlmsg_end(skb, nlh);
2226 read_unlock_bh(&tbl->lock);
2227 nlmsg_cancel(skb, nlh);
2231 static int neightbl_fill_param_info(struct sk_buff *skb,
2232 struct neigh_table *tbl,
2233 struct neigh_parms *parms,
2234 u32 pid, u32 seq, int type,
2237 struct ndtmsg *ndtmsg;
2238 struct nlmsghdr *nlh;
2240 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2244 ndtmsg = nlmsg_data(nlh);
2246 read_lock_bh(&tbl->lock);
2247 ndtmsg->ndtm_family = tbl->family;
2248 ndtmsg->ndtm_pad1 = 0;
2249 ndtmsg->ndtm_pad2 = 0;
2251 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2252 neightbl_fill_parms(skb, parms) < 0)
2255 read_unlock_bh(&tbl->lock);
2256 nlmsg_end(skb, nlh);
2259 read_unlock_bh(&tbl->lock);
2260 nlmsg_cancel(skb, nlh);
2264 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2265 [NDTA_NAME] = { .type = NLA_STRING },
2266 [NDTA_THRESH1] = { .type = NLA_U32 },
2267 [NDTA_THRESH2] = { .type = NLA_U32 },
2268 [NDTA_THRESH3] = { .type = NLA_U32 },
2269 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2270 [NDTA_PARMS] = { .type = NLA_NESTED },
2273 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2274 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2275 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2276 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2277 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2278 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2279 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2280 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2281 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2282 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2283 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2284 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2285 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2286 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2287 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2288 [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
2291 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2292 struct netlink_ext_ack *extack)
2294 struct net *net = sock_net(skb->sk);
2295 struct neigh_table *tbl;
2296 struct ndtmsg *ndtmsg;
2297 struct nlattr *tb[NDTA_MAX+1];
2301 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2302 nl_neightbl_policy, extack);
2306 if (tb[NDTA_NAME] == NULL) {
2311 ndtmsg = nlmsg_data(nlh);
2313 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2314 tbl = neigh_tables[tidx];
2317 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2319 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2329 * We acquire tbl->lock to be nice to the periodic timers and
2330 * make sure they always see a consistent set of values.
2332 write_lock_bh(&tbl->lock);
2334 if (tb[NDTA_PARMS]) {
2335 struct nlattr *tbp[NDTPA_MAX+1];
2336 struct neigh_parms *p;
2339 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2341 nl_ntbl_parm_policy, extack);
2343 goto errout_tbl_lock;
2345 if (tbp[NDTPA_IFINDEX])
2346 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2348 p = lookup_neigh_parms(tbl, net, ifindex);
2351 goto errout_tbl_lock;
2354 for (i = 1; i <= NDTPA_MAX; i++) {
2359 case NDTPA_QUEUE_LEN:
2360 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2361 nla_get_u32(tbp[i]) *
2362 SKB_TRUESIZE(ETH_FRAME_LEN));
2364 case NDTPA_QUEUE_LENBYTES:
2365 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2366 nla_get_u32(tbp[i]));
2368 case NDTPA_PROXY_QLEN:
2369 NEIGH_VAR_SET(p, PROXY_QLEN,
2370 nla_get_u32(tbp[i]));
2372 case NDTPA_APP_PROBES:
2373 NEIGH_VAR_SET(p, APP_PROBES,
2374 nla_get_u32(tbp[i]));
2376 case NDTPA_UCAST_PROBES:
2377 NEIGH_VAR_SET(p, UCAST_PROBES,
2378 nla_get_u32(tbp[i]));
2380 case NDTPA_MCAST_PROBES:
2381 NEIGH_VAR_SET(p, MCAST_PROBES,
2382 nla_get_u32(tbp[i]));
2384 case NDTPA_MCAST_REPROBES:
2385 NEIGH_VAR_SET(p, MCAST_REPROBES,
2386 nla_get_u32(tbp[i]));
2388 case NDTPA_BASE_REACHABLE_TIME:
2389 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2390 nla_get_msecs(tbp[i]));
2391 /* update reachable_time as well, otherwise, the change will
2392 * only be effective after the next time neigh_periodic_work
2393 * decides to recompute it (can be multiple minutes)
2396 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2398 case NDTPA_GC_STALETIME:
2399 NEIGH_VAR_SET(p, GC_STALETIME,
2400 nla_get_msecs(tbp[i]));
2402 case NDTPA_DELAY_PROBE_TIME:
2403 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2404 nla_get_msecs(tbp[i]));
2405 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2407 case NDTPA_INTERVAL_PROBE_TIME_MS:
2408 NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2409 nla_get_msecs(tbp[i]));
2411 case NDTPA_RETRANS_TIME:
2412 NEIGH_VAR_SET(p, RETRANS_TIME,
2413 nla_get_msecs(tbp[i]));
2415 case NDTPA_ANYCAST_DELAY:
2416 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2417 nla_get_msecs(tbp[i]));
2419 case NDTPA_PROXY_DELAY:
2420 NEIGH_VAR_SET(p, PROXY_DELAY,
2421 nla_get_msecs(tbp[i]));
2423 case NDTPA_LOCKTIME:
2424 NEIGH_VAR_SET(p, LOCKTIME,
2425 nla_get_msecs(tbp[i]));
2432 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2433 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2434 !net_eq(net, &init_net))
2435 goto errout_tbl_lock;
2437 if (tb[NDTA_THRESH1])
2438 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2440 if (tb[NDTA_THRESH2])
2441 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2443 if (tb[NDTA_THRESH3])
2444 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2446 if (tb[NDTA_GC_INTERVAL])
2447 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2452 write_unlock_bh(&tbl->lock);
2457 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2458 struct netlink_ext_ack *extack)
2460 struct ndtmsg *ndtm;
2462 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2463 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2467 ndtm = nlmsg_data(nlh);
2468 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2469 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2473 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2474 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2481 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2483 const struct nlmsghdr *nlh = cb->nlh;
2484 struct net *net = sock_net(skb->sk);
2485 int family, tidx, nidx = 0;
2486 int tbl_skip = cb->args[0];
2487 int neigh_skip = cb->args[1];
2488 struct neigh_table *tbl;
2490 if (cb->strict_check) {
2491 int err = neightbl_valid_dump_info(nlh, cb->extack);
2497 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2499 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2500 struct neigh_parms *p;
2502 tbl = neigh_tables[tidx];
2506 if (tidx < tbl_skip || (family && tbl->family != family))
2509 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2510 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2515 p = list_next_entry(&tbl->parms, list);
2516 list_for_each_entry_from(p, &tbl->parms_list, list) {
2517 if (!net_eq(neigh_parms_net(p), net))
2520 if (nidx < neigh_skip)
2523 if (neightbl_fill_param_info(skb, tbl, p,
2524 NETLINK_CB(cb->skb).portid,
2542 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2543 u32 pid, u32 seq, int type, unsigned int flags)
2545 u32 neigh_flags, neigh_flags_ext;
2546 unsigned long now = jiffies;
2547 struct nda_cacheinfo ci;
2548 struct nlmsghdr *nlh;
2551 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2555 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2556 neigh_flags = neigh->flags & NTF_OLD_MASK;
2558 ndm = nlmsg_data(nlh);
2559 ndm->ndm_family = neigh->ops->family;
2562 ndm->ndm_flags = neigh_flags;
2563 ndm->ndm_type = neigh->type;
2564 ndm->ndm_ifindex = neigh->dev->ifindex;
2566 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2567 goto nla_put_failure;
2569 read_lock_bh(&neigh->lock);
2570 ndm->ndm_state = neigh->nud_state;
2571 if (neigh->nud_state & NUD_VALID) {
2572 char haddr[MAX_ADDR_LEN];
2574 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2575 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2576 read_unlock_bh(&neigh->lock);
2577 goto nla_put_failure;
2581 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2582 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2583 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2584 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2585 read_unlock_bh(&neigh->lock);
2587 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2588 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2589 goto nla_put_failure;
2591 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2592 goto nla_put_failure;
2593 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2594 goto nla_put_failure;
2596 nlmsg_end(skb, nlh);
2600 nlmsg_cancel(skb, nlh);
2604 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2605 u32 pid, u32 seq, int type, unsigned int flags,
2606 struct neigh_table *tbl)
2608 u32 neigh_flags, neigh_flags_ext;
2609 struct nlmsghdr *nlh;
2612 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2616 neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2617 neigh_flags = pn->flags & NTF_OLD_MASK;
2619 ndm = nlmsg_data(nlh);
2620 ndm->ndm_family = tbl->family;
2623 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2624 ndm->ndm_type = RTN_UNICAST;
2625 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2626 ndm->ndm_state = NUD_NONE;
2628 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2629 goto nla_put_failure;
2631 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2632 goto nla_put_failure;
2633 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2634 goto nla_put_failure;
2636 nlmsg_end(skb, nlh);
2640 nlmsg_cancel(skb, nlh);
2644 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2646 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2647 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2650 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2652 struct net_device *master;
2657 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2659 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2660 * invalid value for ifindex to denote "no master".
2662 if (master_idx == -1)
2665 if (!master || master->ifindex != master_idx)
2671 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2673 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2679 struct neigh_dump_filter {
2684 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2685 struct netlink_callback *cb,
2686 struct neigh_dump_filter *filter)
2688 struct net *net = sock_net(skb->sk);
2689 struct neighbour *n;
2690 int rc, h, s_h = cb->args[1];
2691 int idx, s_idx = idx = cb->args[2];
2692 struct neigh_hash_table *nht;
2693 unsigned int flags = NLM_F_MULTI;
2695 if (filter->dev_idx || filter->master_idx)
2696 flags |= NLM_F_DUMP_FILTERED;
2699 nht = rcu_dereference_bh(tbl->nht);
2701 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2704 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2706 n = rcu_dereference_bh(n->next)) {
2707 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2709 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2710 neigh_master_filtered(n->dev, filter->master_idx))
2712 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2725 rcu_read_unlock_bh();
2731 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2732 struct netlink_callback *cb,
2733 struct neigh_dump_filter *filter)
2735 struct pneigh_entry *n;
2736 struct net *net = sock_net(skb->sk);
2737 int rc, h, s_h = cb->args[3];
2738 int idx, s_idx = idx = cb->args[4];
2739 unsigned int flags = NLM_F_MULTI;
2741 if (filter->dev_idx || filter->master_idx)
2742 flags |= NLM_F_DUMP_FILTERED;
2744 read_lock_bh(&tbl->lock);
2746 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2749 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2750 if (idx < s_idx || pneigh_net(n) != net)
2752 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2753 neigh_master_filtered(n->dev, filter->master_idx))
2755 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2757 RTM_NEWNEIGH, flags, tbl) < 0) {
2758 read_unlock_bh(&tbl->lock);
2767 read_unlock_bh(&tbl->lock);
2776 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2778 struct neigh_dump_filter *filter,
2779 struct netlink_ext_ack *extack)
2781 struct nlattr *tb[NDA_MAX + 1];
2787 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2788 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2792 ndm = nlmsg_data(nlh);
2793 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2794 ndm->ndm_state || ndm->ndm_type) {
2795 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2799 if (ndm->ndm_flags & ~NTF_PROXY) {
2800 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2804 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2805 tb, NDA_MAX, nda_policy,
2808 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2809 NDA_MAX, nda_policy, extack);
2814 for (i = 0; i <= NDA_MAX; ++i) {
2818 /* all new attributes should require strict_check */
2821 filter->dev_idx = nla_get_u32(tb[i]);
2824 filter->master_idx = nla_get_u32(tb[i]);
2828 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2837 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2839 const struct nlmsghdr *nlh = cb->nlh;
2840 struct neigh_dump_filter filter = {};
2841 struct neigh_table *tbl;
2846 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2848 /* check for full ndmsg structure presence, family member is
2849 * the same for both structures
2851 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2852 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2855 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2856 if (err < 0 && cb->strict_check)
2861 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2862 tbl = neigh_tables[t];
2866 if (t < s_t || (family && tbl->family != family))
2869 memset(&cb->args[1], 0, sizeof(cb->args) -
2870 sizeof(cb->args[0]));
2872 err = pneigh_dump_table(tbl, skb, cb, &filter);
2874 err = neigh_dump_table(tbl, skb, cb, &filter);
2883 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2884 struct neigh_table **tbl,
2885 void **dst, int *dev_idx, u8 *ndm_flags,
2886 struct netlink_ext_ack *extack)
2888 struct nlattr *tb[NDA_MAX + 1];
2892 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2893 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2897 ndm = nlmsg_data(nlh);
2898 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2900 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2904 if (ndm->ndm_flags & ~NTF_PROXY) {
2905 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2909 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2910 NDA_MAX, nda_policy, extack);
2914 *ndm_flags = ndm->ndm_flags;
2915 *dev_idx = ndm->ndm_ifindex;
2916 *tbl = neigh_find_table(ndm->ndm_family);
2918 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2919 return -EAFNOSUPPORT;
2922 for (i = 0; i <= NDA_MAX; ++i) {
2928 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2929 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2932 *dst = nla_data(tb[i]);
2935 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2943 static inline size_t neigh_nlmsg_size(void)
2945 return NLMSG_ALIGN(sizeof(struct ndmsg))
2946 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2947 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2948 + nla_total_size(sizeof(struct nda_cacheinfo))
2949 + nla_total_size(4) /* NDA_PROBES */
2950 + nla_total_size(4) /* NDA_FLAGS_EXT */
2951 + nla_total_size(1); /* NDA_PROTOCOL */
2954 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2957 struct sk_buff *skb;
2960 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2964 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2970 err = rtnl_unicast(skb, net, pid);
2975 static inline size_t pneigh_nlmsg_size(void)
2977 return NLMSG_ALIGN(sizeof(struct ndmsg))
2978 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2979 + nla_total_size(4) /* NDA_FLAGS_EXT */
2980 + nla_total_size(1); /* NDA_PROTOCOL */
2983 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2984 u32 pid, u32 seq, struct neigh_table *tbl)
2986 struct sk_buff *skb;
2989 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2993 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2999 err = rtnl_unicast(skb, net, pid);
3004 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3005 struct netlink_ext_ack *extack)
3007 struct net *net = sock_net(in_skb->sk);
3008 struct net_device *dev = NULL;
3009 struct neigh_table *tbl = NULL;
3010 struct neighbour *neigh;
3016 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
3022 dev = __dev_get_by_index(net, dev_idx);
3024 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3030 NL_SET_ERR_MSG(extack, "Network address not specified");
3034 if (ndm_flags & NTF_PROXY) {
3035 struct pneigh_entry *pn;
3037 pn = pneigh_lookup(tbl, net, dst, dev, 0);
3039 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3042 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3043 nlh->nlmsg_seq, tbl);
3047 NL_SET_ERR_MSG(extack, "No device specified");
3051 neigh = neigh_lookup(tbl, dst, dev);
3053 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3057 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3060 neigh_release(neigh);
3065 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3068 struct neigh_hash_table *nht;
3071 nht = rcu_dereference_bh(tbl->nht);
3073 read_lock(&tbl->lock); /* avoid resizes */
3074 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3075 struct neighbour *n;
3077 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
3079 n = rcu_dereference_bh(n->next))
3082 read_unlock(&tbl->lock);
3083 rcu_read_unlock_bh();
3085 EXPORT_SYMBOL(neigh_for_each);
3087 /* The tbl->lock must be held as a writer and BH disabled. */
3088 void __neigh_for_each_release(struct neigh_table *tbl,
3089 int (*cb)(struct neighbour *))
3092 struct neigh_hash_table *nht;
3094 nht = rcu_dereference_protected(tbl->nht,
3095 lockdep_is_held(&tbl->lock));
3096 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3097 struct neighbour *n;
3098 struct neighbour __rcu **np;
3100 np = &nht->hash_buckets[chain];
3101 while ((n = rcu_dereference_protected(*np,
3102 lockdep_is_held(&tbl->lock))) != NULL) {
3105 write_lock(&n->lock);
3108 rcu_assign_pointer(*np,
3109 rcu_dereference_protected(n->next,
3110 lockdep_is_held(&tbl->lock)));
3114 write_unlock(&n->lock);
3116 neigh_cleanup_and_release(n);
3120 EXPORT_SYMBOL(__neigh_for_each_release);
3122 int neigh_xmit(int index, struct net_device *dev,
3123 const void *addr, struct sk_buff *skb)
3125 int err = -EAFNOSUPPORT;
3126 if (likely(index < NEIGH_NR_TABLES)) {
3127 struct neigh_table *tbl;
3128 struct neighbour *neigh;
3130 tbl = neigh_tables[index];
3134 if (index == NEIGH_ARP_TABLE) {
3135 u32 key = *((u32 *)addr);
3137 neigh = __ipv4_neigh_lookup_noref(dev, key);
3139 neigh = __neigh_lookup_noref(tbl, addr, dev);
3142 neigh = __neigh_create(tbl, addr, dev, false);
3143 err = PTR_ERR(neigh);
3144 if (IS_ERR(neigh)) {
3145 rcu_read_unlock_bh();
3148 err = neigh->output(neigh, skb);
3149 rcu_read_unlock_bh();
3151 else if (index == NEIGH_LINK_TABLE) {
3152 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3153 addr, NULL, skb->len);
3156 err = dev_queue_xmit(skb);
3164 EXPORT_SYMBOL(neigh_xmit);
3166 #ifdef CONFIG_PROC_FS
3168 static struct neighbour *neigh_get_first(struct seq_file *seq)
3170 struct neigh_seq_state *state = seq->private;
3171 struct net *net = seq_file_net(seq);
3172 struct neigh_hash_table *nht = state->nht;
3173 struct neighbour *n = NULL;
3176 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3177 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3178 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3181 if (!net_eq(dev_net(n->dev), net))
3183 if (state->neigh_sub_iter) {
3187 v = state->neigh_sub_iter(state, n, &fakep);
3191 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3193 if (n->nud_state & ~NUD_NOARP)
3196 n = rcu_dereference_bh(n->next);
3202 state->bucket = bucket;
3207 static struct neighbour *neigh_get_next(struct seq_file *seq,
3208 struct neighbour *n,
3211 struct neigh_seq_state *state = seq->private;
3212 struct net *net = seq_file_net(seq);
3213 struct neigh_hash_table *nht = state->nht;
3215 if (state->neigh_sub_iter) {
3216 void *v = state->neigh_sub_iter(state, n, pos);
3220 n = rcu_dereference_bh(n->next);
3224 if (!net_eq(dev_net(n->dev), net))
3226 if (state->neigh_sub_iter) {
3227 void *v = state->neigh_sub_iter(state, n, pos);
3232 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3235 if (n->nud_state & ~NUD_NOARP)
3238 n = rcu_dereference_bh(n->next);
3244 if (++state->bucket >= (1 << nht->hash_shift))
3247 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3255 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3257 struct neighbour *n = neigh_get_first(seq);
3262 n = neigh_get_next(seq, n, pos);
3267 return *pos ? NULL : n;
3270 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3272 struct neigh_seq_state *state = seq->private;
3273 struct net *net = seq_file_net(seq);
3274 struct neigh_table *tbl = state->tbl;
3275 struct pneigh_entry *pn = NULL;
3278 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3279 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3280 pn = tbl->phash_buckets[bucket];
3281 while (pn && !net_eq(pneigh_net(pn), net))
3286 state->bucket = bucket;
3291 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3292 struct pneigh_entry *pn,
3295 struct neigh_seq_state *state = seq->private;
3296 struct net *net = seq_file_net(seq);
3297 struct neigh_table *tbl = state->tbl;
3301 } while (pn && !net_eq(pneigh_net(pn), net));
3304 if (++state->bucket > PNEIGH_HASHMASK)
3306 pn = tbl->phash_buckets[state->bucket];
3307 while (pn && !net_eq(pneigh_net(pn), net))
3319 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3321 struct pneigh_entry *pn = pneigh_get_first(seq);
3326 pn = pneigh_get_next(seq, pn, pos);
3331 return *pos ? NULL : pn;
3334 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3336 struct neigh_seq_state *state = seq->private;
3338 loff_t idxpos = *pos;
3340 rc = neigh_get_idx(seq, &idxpos);
3341 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3342 rc = pneigh_get_idx(seq, &idxpos);
3347 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3348 __acquires(tbl->lock)
3351 struct neigh_seq_state *state = seq->private;
3355 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3358 state->nht = rcu_dereference_bh(tbl->nht);
3359 read_lock(&tbl->lock);
3361 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3363 EXPORT_SYMBOL(neigh_seq_start);
3365 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3367 struct neigh_seq_state *state;
3370 if (v == SEQ_START_TOKEN) {
3371 rc = neigh_get_first(seq);
3375 state = seq->private;
3376 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3377 rc = neigh_get_next(seq, v, NULL);
3380 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3381 rc = pneigh_get_first(seq);
3383 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3384 rc = pneigh_get_next(seq, v, NULL);
3390 EXPORT_SYMBOL(neigh_seq_next);
3392 void neigh_seq_stop(struct seq_file *seq, void *v)
3393 __releases(tbl->lock)
3396 struct neigh_seq_state *state = seq->private;
3397 struct neigh_table *tbl = state->tbl;
3399 read_unlock(&tbl->lock);
3400 rcu_read_unlock_bh();
3402 EXPORT_SYMBOL(neigh_seq_stop);
3404 /* statistics via seq_file */
3406 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3408 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3412 return SEQ_START_TOKEN;
3414 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3415 if (!cpu_possible(cpu))
3418 return per_cpu_ptr(tbl->stats, cpu);
3423 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3425 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3428 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3429 if (!cpu_possible(cpu))
3432 return per_cpu_ptr(tbl->stats, cpu);
3438 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3443 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3445 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3446 struct neigh_statistics *st = v;
3448 if (v == SEQ_START_TOKEN) {
3449 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3453 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3454 "%08lx %08lx %08lx "
3455 "%08lx %08lx %08lx\n",
3456 atomic_read(&tbl->entries),
3467 st->rcv_probes_mcast,
3468 st->rcv_probes_ucast,
3470 st->periodic_gc_runs,
3479 static const struct seq_operations neigh_stat_seq_ops = {
3480 .start = neigh_stat_seq_start,
3481 .next = neigh_stat_seq_next,
3482 .stop = neigh_stat_seq_stop,
3483 .show = neigh_stat_seq_show,
3485 #endif /* CONFIG_PROC_FS */
3487 static void __neigh_notify(struct neighbour *n, int type, int flags,
3490 struct net *net = dev_net(n->dev);
3491 struct sk_buff *skb;
3494 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3498 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3500 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3501 WARN_ON(err == -EMSGSIZE);
3505 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3509 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3512 void neigh_app_ns(struct neighbour *n)
3514 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3516 EXPORT_SYMBOL(neigh_app_ns);
3518 #ifdef CONFIG_SYSCTL
3519 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3521 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3522 void *buffer, size_t *lenp, loff_t *ppos)
3525 struct ctl_table tmp = *ctl;
3527 tmp.extra1 = SYSCTL_ZERO;
3528 tmp.extra2 = &unres_qlen_max;
3531 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3532 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3535 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3539 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3544 return __in_dev_arp_parms_get_rcu(dev);
3546 return __in6_dev_nd_parms_get_rcu(dev);
3551 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3554 struct net_device *dev;
3555 int family = neigh_parms_family(p);
3558 for_each_netdev_rcu(net, dev) {
3559 struct neigh_parms *dst_p =
3560 neigh_get_dev_parms_rcu(dev, family);
3562 if (dst_p && !test_bit(index, dst_p->data_state))
3563 dst_p->data[index] = p->data[index];
3568 static void neigh_proc_update(struct ctl_table *ctl, int write)
3570 struct net_device *dev = ctl->extra1;
3571 struct neigh_parms *p = ctl->extra2;
3572 struct net *net = neigh_parms_net(p);
3573 int index = (int *) ctl->data - p->data;
3578 set_bit(index, p->data_state);
3579 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3580 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3581 if (!dev) /* NULL dev means this is default value */
3582 neigh_copy_dflt_parms(net, p, index);
3585 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3586 void *buffer, size_t *lenp,
3589 struct ctl_table tmp = *ctl;
3592 tmp.extra1 = SYSCTL_ZERO;
3593 tmp.extra2 = SYSCTL_INT_MAX;
3595 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3596 neigh_proc_update(ctl, write);
3600 static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
3601 void *buffer, size_t *lenp, loff_t *ppos)
3603 struct ctl_table tmp = *ctl;
3606 int min = msecs_to_jiffies(1);
3611 ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3612 neigh_proc_update(ctl, write);
3616 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3617 size_t *lenp, loff_t *ppos)
3619 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3621 neigh_proc_update(ctl, write);
3624 EXPORT_SYMBOL(neigh_proc_dointvec);
3626 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3627 size_t *lenp, loff_t *ppos)
3629 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3631 neigh_proc_update(ctl, write);
3634 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3636 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3637 void *buffer, size_t *lenp,
3640 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3642 neigh_proc_update(ctl, write);
3646 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3647 void *buffer, size_t *lenp, loff_t *ppos)
3649 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3651 neigh_proc_update(ctl, write);
3654 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3656 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3657 void *buffer, size_t *lenp,
3660 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3662 neigh_proc_update(ctl, write);
3666 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3667 void *buffer, size_t *lenp,
3670 struct neigh_parms *p = ctl->extra2;
3673 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3674 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3675 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3676 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3680 if (write && ret == 0) {
3681 /* update reachable_time as well, otherwise, the change will
3682 * only be effective after the next time neigh_periodic_work
3683 * decides to recompute it
3686 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3691 #define NEIGH_PARMS_DATA_OFFSET(index) \
3692 (&((struct neigh_parms *) 0)->data[index])
3694 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3695 [NEIGH_VAR_ ## attr] = { \
3697 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3698 .maxlen = sizeof(int), \
3700 .proc_handler = proc, \
3703 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3704 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3706 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3707 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3709 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3710 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3712 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3713 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3715 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3716 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3718 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3719 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3721 static struct neigh_sysctl_table {
3722 struct ctl_table_header *sysctl_header;
3723 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3724 } neigh_sysctl_template __read_mostly = {
3726 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3727 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3728 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3729 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3730 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3731 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3732 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3733 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3734 "interval_probe_time_ms"),
3735 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3736 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3737 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3738 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3739 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3740 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3741 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3742 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3743 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3744 [NEIGH_VAR_GC_INTERVAL] = {
3745 .procname = "gc_interval",
3746 .maxlen = sizeof(int),
3748 .proc_handler = proc_dointvec_jiffies,
3750 [NEIGH_VAR_GC_THRESH1] = {
3751 .procname = "gc_thresh1",
3752 .maxlen = sizeof(int),
3754 .extra1 = SYSCTL_ZERO,
3755 .extra2 = SYSCTL_INT_MAX,
3756 .proc_handler = proc_dointvec_minmax,
3758 [NEIGH_VAR_GC_THRESH2] = {
3759 .procname = "gc_thresh2",
3760 .maxlen = sizeof(int),
3762 .extra1 = SYSCTL_ZERO,
3763 .extra2 = SYSCTL_INT_MAX,
3764 .proc_handler = proc_dointvec_minmax,
3766 [NEIGH_VAR_GC_THRESH3] = {
3767 .procname = "gc_thresh3",
3768 .maxlen = sizeof(int),
3770 .extra1 = SYSCTL_ZERO,
3771 .extra2 = SYSCTL_INT_MAX,
3772 .proc_handler = proc_dointvec_minmax,
3778 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3779 proc_handler *handler)
3782 struct neigh_sysctl_table *t;
3783 const char *dev_name_source;
3784 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3787 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3791 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3792 t->neigh_vars[i].data += (long) p;
3793 t->neigh_vars[i].extra1 = dev;
3794 t->neigh_vars[i].extra2 = p;
3798 dev_name_source = dev->name;
3799 /* Terminate the table early */
3800 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3801 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3803 struct neigh_table *tbl = p->tbl;
3804 dev_name_source = "default";
3805 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3806 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3807 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3808 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3813 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3815 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3816 /* RetransTime (in milliseconds)*/
3817 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3818 /* ReachableTime (in milliseconds) */
3819 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3821 /* Those handlers will update p->reachable_time after
3822 * base_reachable_time(_ms) is set to ensure the new timer starts being
3823 * applied after the next neighbour update instead of waiting for
3824 * neigh_periodic_work to update its value (can be multiple minutes)
3825 * So any handler that replaces them should do this as well
3828 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3829 neigh_proc_base_reachable_time;
3830 /* ReachableTime (in milliseconds) */
3831 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3832 neigh_proc_base_reachable_time;
3835 switch (neigh_parms_family(p)) {
3846 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3847 p_name, dev_name_source);
3849 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3850 if (!t->sysctl_header)
3853 p->sysctl_table = t;
3861 EXPORT_SYMBOL(neigh_sysctl_register);
3863 void neigh_sysctl_unregister(struct neigh_parms *p)
3865 if (p->sysctl_table) {
3866 struct neigh_sysctl_table *t = p->sysctl_table;
3867 p->sysctl_table = NULL;
3868 unregister_net_sysctl_table(t->sysctl_header);
3872 EXPORT_SYMBOL(neigh_sysctl_unregister);
3874 #endif /* CONFIG_SYSCTL */
3876 static int __init neigh_init(void)
3878 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3879 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3880 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3882 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3884 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3889 subsys_initcall(neigh_init);