ipv6: sr: fix BUG due to headroom too small after SRH push
[platform/kernel/linux-rpi.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)              \
47 do {                                            \
48         if (level <= NEIGH_DEBUG)               \
49                 pr_debug(fmt, ##__VA_ARGS__);   \
50 } while (0)
51
52 #define PNEIGH_HASHMASK         0xF
53
54 static void neigh_timer_handler(unsigned long arg);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56                            u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
59
60 #ifdef CONFIG_PROC_FS
61 static const struct file_operations neigh_stat_seq_fops;
62 #endif
63
64 /*
65    Neighbour hash table buckets are protected with rwlock tbl->lock.
66
67    - All the scans/updates to hash buckets MUST be made under this lock.
68    - NOTHING clever should be made under this lock: no callbacks
69      to protocol backends, no attempts to send something to network.
70      It will result in deadlocks, if backend/driver wants to use neighbour
71      cache.
72    - If the entry requires some non-trivial actions, increase
73      its reference count and release table lock.
74
75    Neighbour entries are protected:
76    - with reference count.
77    - with rwlock neigh->lock
78
79    Reference count prevents destruction.
80
81    neigh->lock mainly serializes ll address data and its validity state.
82    However, the same lock is used to protect another entry fields:
83     - timer
84     - resolution queue
85
86    Again, nothing clever shall be made under neigh->lock,
87    the most complicated procedure, which we allow is dev->hard_header.
88    It is supposed, that dev->hard_header is simplistic and does
89    not make callbacks to neighbour tables.
90  */
91
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
93 {
94         kfree_skb(skb);
95         return -ENETDOWN;
96 }
97
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
99 {
100         if (neigh->parms->neigh_cleanup)
101                 neigh->parms->neigh_cleanup(neigh);
102
103         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
104         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
105         neigh_release(neigh);
106 }
107
108 /*
109  * It is random distribution in the interval (1/2)*base...(3/2)*base.
110  * It corresponds to default IPv6 settings and is not overridable,
111  * because it is really reasonable choice.
112  */
113
114 unsigned long neigh_rand_reach_time(unsigned long base)
115 {
116         return base ? (prandom_u32() % base) + (base >> 1) : 0;
117 }
118 EXPORT_SYMBOL(neigh_rand_reach_time);
119
120
121 static int neigh_forced_gc(struct neigh_table *tbl)
122 {
123         int shrunk = 0;
124         int i;
125         struct neigh_hash_table *nht;
126
127         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
128
129         write_lock_bh(&tbl->lock);
130         nht = rcu_dereference_protected(tbl->nht,
131                                         lockdep_is_held(&tbl->lock));
132         for (i = 0; i < (1 << nht->hash_shift); i++) {
133                 struct neighbour *n;
134                 struct neighbour __rcu **np;
135
136                 np = &nht->hash_buckets[i];
137                 while ((n = rcu_dereference_protected(*np,
138                                         lockdep_is_held(&tbl->lock))) != NULL) {
139                         /* Neighbour record may be discarded if:
140                          * - nobody refers to it.
141                          * - it is not permanent
142                          */
143                         write_lock(&n->lock);
144                         if (atomic_read(&n->refcnt) == 1 &&
145                             !(n->nud_state & NUD_PERMANENT)) {
146                                 rcu_assign_pointer(*np,
147                                         rcu_dereference_protected(n->next,
148                                                   lockdep_is_held(&tbl->lock)));
149                                 n->dead = 1;
150                                 shrunk  = 1;
151                                 write_unlock(&n->lock);
152                                 neigh_cleanup_and_release(n);
153                                 continue;
154                         }
155                         write_unlock(&n->lock);
156                         np = &n->next;
157                 }
158         }
159
160         tbl->last_flush = jiffies;
161
162         write_unlock_bh(&tbl->lock);
163
164         return shrunk;
165 }
166
167 static void neigh_add_timer(struct neighbour *n, unsigned long when)
168 {
169         neigh_hold(n);
170         if (unlikely(mod_timer(&n->timer, when))) {
171                 printk("NEIGH: BUG, double timer add, state is %x\n",
172                        n->nud_state);
173                 dump_stack();
174         }
175 }
176
177 static int neigh_del_timer(struct neighbour *n)
178 {
179         if ((n->nud_state & NUD_IN_TIMER) &&
180             del_timer(&n->timer)) {
181                 neigh_release(n);
182                 return 1;
183         }
184         return 0;
185 }
186
187 static void pneigh_queue_purge(struct sk_buff_head *list)
188 {
189         struct sk_buff *skb;
190
191         while ((skb = skb_dequeue(list)) != NULL) {
192                 dev_put(skb->dev);
193                 kfree_skb(skb);
194         }
195 }
196
197 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
198 {
199         int i;
200         struct neigh_hash_table *nht;
201
202         nht = rcu_dereference_protected(tbl->nht,
203                                         lockdep_is_held(&tbl->lock));
204
205         for (i = 0; i < (1 << nht->hash_shift); i++) {
206                 struct neighbour *n;
207                 struct neighbour __rcu **np = &nht->hash_buckets[i];
208
209                 while ((n = rcu_dereference_protected(*np,
210                                         lockdep_is_held(&tbl->lock))) != NULL) {
211                         if (dev && n->dev != dev) {
212                                 np = &n->next;
213                                 continue;
214                         }
215                         rcu_assign_pointer(*np,
216                                    rcu_dereference_protected(n->next,
217                                                 lockdep_is_held(&tbl->lock)));
218                         write_lock(&n->lock);
219                         neigh_del_timer(n);
220                         n->dead = 1;
221
222                         if (atomic_read(&n->refcnt) != 1) {
223                                 /* The most unpleasant situation.
224                                    We must destroy neighbour entry,
225                                    but someone still uses it.
226
227                                    The destroy will be delayed until
228                                    the last user releases us, but
229                                    we must kill timers etc. and move
230                                    it to safe state.
231                                  */
232                                 __skb_queue_purge(&n->arp_queue);
233                                 n->arp_queue_len_bytes = 0;
234                                 n->output = neigh_blackhole;
235                                 if (n->nud_state & NUD_VALID)
236                                         n->nud_state = NUD_NOARP;
237                                 else
238                                         n->nud_state = NUD_NONE;
239                                 neigh_dbg(2, "neigh %p is stray\n", n);
240                         }
241                         write_unlock(&n->lock);
242                         neigh_cleanup_and_release(n);
243                 }
244         }
245 }
246
247 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
248 {
249         write_lock_bh(&tbl->lock);
250         neigh_flush_dev(tbl, dev);
251         write_unlock_bh(&tbl->lock);
252 }
253 EXPORT_SYMBOL(neigh_changeaddr);
254
255 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         pneigh_ifdown(tbl, dev);
260         write_unlock_bh(&tbl->lock);
261
262         del_timer_sync(&tbl->proxy_timer);
263         pneigh_queue_purge(&tbl->proxy_queue);
264         return 0;
265 }
266 EXPORT_SYMBOL(neigh_ifdown);
267
268 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
269 {
270         struct neighbour *n = NULL;
271         unsigned long now = jiffies;
272         int entries;
273
274         entries = atomic_inc_return(&tbl->entries) - 1;
275         if (entries >= tbl->gc_thresh3 ||
276             (entries >= tbl->gc_thresh2 &&
277              time_after(now, tbl->last_flush + 5 * HZ))) {
278                 if (!neigh_forced_gc(tbl) &&
279                     entries >= tbl->gc_thresh3) {
280                         net_info_ratelimited("%s: neighbor table overflow!\n",
281                                              tbl->id);
282                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
283                         goto out_entries;
284                 }
285         }
286
287         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
288         if (!n)
289                 goto out_entries;
290
291         __skb_queue_head_init(&n->arp_queue);
292         rwlock_init(&n->lock);
293         seqlock_init(&n->ha_lock);
294         n->updated        = n->used = now;
295         n->nud_state      = NUD_NONE;
296         n->output         = neigh_blackhole;
297         seqlock_init(&n->hh.hh_lock);
298         n->parms          = neigh_parms_clone(&tbl->parms);
299         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
300
301         NEIGH_CACHE_STAT_INC(tbl, allocs);
302         n->tbl            = tbl;
303         atomic_set(&n->refcnt, 1);
304         n->dead           = 1;
305 out:
306         return n;
307
308 out_entries:
309         atomic_dec(&tbl->entries);
310         goto out;
311 }
312
313 static void neigh_get_hash_rnd(u32 *x)
314 {
315         get_random_bytes(x, sizeof(*x));
316         *x |= 1;
317 }
318
319 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
320 {
321         size_t size = (1 << shift) * sizeof(struct neighbour *);
322         struct neigh_hash_table *ret;
323         struct neighbour __rcu **buckets;
324         int i;
325
326         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
327         if (!ret)
328                 return NULL;
329         if (size <= PAGE_SIZE)
330                 buckets = kzalloc(size, GFP_ATOMIC);
331         else
332                 buckets = (struct neighbour __rcu **)
333                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
334                                            get_order(size));
335         if (!buckets) {
336                 kfree(ret);
337                 return NULL;
338         }
339         ret->hash_buckets = buckets;
340         ret->hash_shift = shift;
341         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
342                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
343         return ret;
344 }
345
346 static void neigh_hash_free_rcu(struct rcu_head *head)
347 {
348         struct neigh_hash_table *nht = container_of(head,
349                                                     struct neigh_hash_table,
350                                                     rcu);
351         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
352         struct neighbour __rcu **buckets = nht->hash_buckets;
353
354         if (size <= PAGE_SIZE)
355                 kfree(buckets);
356         else
357                 free_pages((unsigned long)buckets, get_order(size));
358         kfree(nht);
359 }
360
361 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
362                                                 unsigned long new_shift)
363 {
364         unsigned int i, hash;
365         struct neigh_hash_table *new_nht, *old_nht;
366
367         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
368
369         old_nht = rcu_dereference_protected(tbl->nht,
370                                             lockdep_is_held(&tbl->lock));
371         new_nht = neigh_hash_alloc(new_shift);
372         if (!new_nht)
373                 return old_nht;
374
375         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
376                 struct neighbour *n, *next;
377
378                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
379                                                    lockdep_is_held(&tbl->lock));
380                      n != NULL;
381                      n = next) {
382                         hash = tbl->hash(n->primary_key, n->dev,
383                                          new_nht->hash_rnd);
384
385                         hash >>= (32 - new_nht->hash_shift);
386                         next = rcu_dereference_protected(n->next,
387                                                 lockdep_is_held(&tbl->lock));
388
389                         rcu_assign_pointer(n->next,
390                                            rcu_dereference_protected(
391                                                 new_nht->hash_buckets[hash],
392                                                 lockdep_is_held(&tbl->lock)));
393                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
394                 }
395         }
396
397         rcu_assign_pointer(tbl->nht, new_nht);
398         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
399         return new_nht;
400 }
401
402 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
403                                struct net_device *dev)
404 {
405         struct neighbour *n;
406
407         NEIGH_CACHE_STAT_INC(tbl, lookups);
408
409         rcu_read_lock_bh();
410         n = __neigh_lookup_noref(tbl, pkey, dev);
411         if (n) {
412                 if (!atomic_inc_not_zero(&n->refcnt))
413                         n = NULL;
414                 NEIGH_CACHE_STAT_INC(tbl, hits);
415         }
416
417         rcu_read_unlock_bh();
418         return n;
419 }
420 EXPORT_SYMBOL(neigh_lookup);
421
422 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
423                                      const void *pkey)
424 {
425         struct neighbour *n;
426         int key_len = tbl->key_len;
427         u32 hash_val;
428         struct neigh_hash_table *nht;
429
430         NEIGH_CACHE_STAT_INC(tbl, lookups);
431
432         rcu_read_lock_bh();
433         nht = rcu_dereference_bh(tbl->nht);
434         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
435
436         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
437              n != NULL;
438              n = rcu_dereference_bh(n->next)) {
439                 if (!memcmp(n->primary_key, pkey, key_len) &&
440                     net_eq(dev_net(n->dev), net)) {
441                         if (!atomic_inc_not_zero(&n->refcnt))
442                                 n = NULL;
443                         NEIGH_CACHE_STAT_INC(tbl, hits);
444                         break;
445                 }
446         }
447
448         rcu_read_unlock_bh();
449         return n;
450 }
451 EXPORT_SYMBOL(neigh_lookup_nodev);
452
453 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
454                                  struct net_device *dev, bool want_ref)
455 {
456         u32 hash_val;
457         int key_len = tbl->key_len;
458         int error;
459         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
460         struct neigh_hash_table *nht;
461
462         if (!n) {
463                 rc = ERR_PTR(-ENOBUFS);
464                 goto out;
465         }
466
467         memcpy(n->primary_key, pkey, key_len);
468         n->dev = dev;
469         dev_hold(dev);
470
471         /* Protocol specific setup. */
472         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
473                 rc = ERR_PTR(error);
474                 goto out_neigh_release;
475         }
476
477         if (dev->netdev_ops->ndo_neigh_construct) {
478                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
479                 if (error < 0) {
480                         rc = ERR_PTR(error);
481                         goto out_neigh_release;
482                 }
483         }
484
485         /* Device specific setup. */
486         if (n->parms->neigh_setup &&
487             (error = n->parms->neigh_setup(n)) < 0) {
488                 rc = ERR_PTR(error);
489                 goto out_neigh_release;
490         }
491
492         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
493
494         write_lock_bh(&tbl->lock);
495         nht = rcu_dereference_protected(tbl->nht,
496                                         lockdep_is_held(&tbl->lock));
497
498         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
499                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
500
501         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
502
503         if (n->parms->dead) {
504                 rc = ERR_PTR(-EINVAL);
505                 goto out_tbl_unlock;
506         }
507
508         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
509                                             lockdep_is_held(&tbl->lock));
510              n1 != NULL;
511              n1 = rcu_dereference_protected(n1->next,
512                         lockdep_is_held(&tbl->lock))) {
513                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
514                         if (want_ref)
515                                 neigh_hold(n1);
516                         rc = n1;
517                         goto out_tbl_unlock;
518                 }
519         }
520
521         n->dead = 0;
522         if (want_ref)
523                 neigh_hold(n);
524         rcu_assign_pointer(n->next,
525                            rcu_dereference_protected(nht->hash_buckets[hash_val],
526                                                      lockdep_is_held(&tbl->lock)));
527         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
528         write_unlock_bh(&tbl->lock);
529         neigh_dbg(2, "neigh %p is created\n", n);
530         rc = n;
531 out:
532         return rc;
533 out_tbl_unlock:
534         write_unlock_bh(&tbl->lock);
535 out_neigh_release:
536         neigh_release(n);
537         goto out;
538 }
539 EXPORT_SYMBOL(__neigh_create);
540
541 static u32 pneigh_hash(const void *pkey, int key_len)
542 {
543         u32 hash_val = *(u32 *)(pkey + key_len - 4);
544         hash_val ^= (hash_val >> 16);
545         hash_val ^= hash_val >> 8;
546         hash_val ^= hash_val >> 4;
547         hash_val &= PNEIGH_HASHMASK;
548         return hash_val;
549 }
550
551 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
552                                               struct net *net,
553                                               const void *pkey,
554                                               int key_len,
555                                               struct net_device *dev)
556 {
557         while (n) {
558                 if (!memcmp(n->key, pkey, key_len) &&
559                     net_eq(pneigh_net(n), net) &&
560                     (n->dev == dev || !n->dev))
561                         return n;
562                 n = n->next;
563         }
564         return NULL;
565 }
566
567 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
568                 struct net *net, const void *pkey, struct net_device *dev)
569 {
570         int key_len = tbl->key_len;
571         u32 hash_val = pneigh_hash(pkey, key_len);
572
573         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
574                                  net, pkey, key_len, dev);
575 }
576 EXPORT_SYMBOL_GPL(__pneigh_lookup);
577
578 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
579                                     struct net *net, const void *pkey,
580                                     struct net_device *dev, int creat)
581 {
582         struct pneigh_entry *n;
583         int key_len = tbl->key_len;
584         u32 hash_val = pneigh_hash(pkey, key_len);
585
586         read_lock_bh(&tbl->lock);
587         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
588                               net, pkey, key_len, dev);
589         read_unlock_bh(&tbl->lock);
590
591         if (n || !creat)
592                 goto out;
593
594         ASSERT_RTNL();
595
596         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
597         if (!n)
598                 goto out;
599
600         write_pnet(&n->net, net);
601         memcpy(n->key, pkey, key_len);
602         n->dev = dev;
603         if (dev)
604                 dev_hold(dev);
605
606         if (tbl->pconstructor && tbl->pconstructor(n)) {
607                 if (dev)
608                         dev_put(dev);
609                 kfree(n);
610                 n = NULL;
611                 goto out;
612         }
613
614         write_lock_bh(&tbl->lock);
615         n->next = tbl->phash_buckets[hash_val];
616         tbl->phash_buckets[hash_val] = n;
617         write_unlock_bh(&tbl->lock);
618 out:
619         return n;
620 }
621 EXPORT_SYMBOL(pneigh_lookup);
622
623
624 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
625                   struct net_device *dev)
626 {
627         struct pneigh_entry *n, **np;
628         int key_len = tbl->key_len;
629         u32 hash_val = pneigh_hash(pkey, key_len);
630
631         write_lock_bh(&tbl->lock);
632         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
633              np = &n->next) {
634                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
635                     net_eq(pneigh_net(n), net)) {
636                         *np = n->next;
637                         write_unlock_bh(&tbl->lock);
638                         if (tbl->pdestructor)
639                                 tbl->pdestructor(n);
640                         if (n->dev)
641                                 dev_put(n->dev);
642                         kfree(n);
643                         return 0;
644                 }
645         }
646         write_unlock_bh(&tbl->lock);
647         return -ENOENT;
648 }
649
650 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
651 {
652         struct pneigh_entry *n, **np;
653         u32 h;
654
655         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
656                 np = &tbl->phash_buckets[h];
657                 while ((n = *np) != NULL) {
658                         if (!dev || n->dev == dev) {
659                                 *np = n->next;
660                                 if (tbl->pdestructor)
661                                         tbl->pdestructor(n);
662                                 if (n->dev)
663                                         dev_put(n->dev);
664                                 kfree(n);
665                                 continue;
666                         }
667                         np = &n->next;
668                 }
669         }
670         return -ENOENT;
671 }
672
673 static void neigh_parms_destroy(struct neigh_parms *parms);
674
675 static inline void neigh_parms_put(struct neigh_parms *parms)
676 {
677         if (atomic_dec_and_test(&parms->refcnt))
678                 neigh_parms_destroy(parms);
679 }
680
681 /*
682  *      neighbour must already be out of the table;
683  *
684  */
685 void neigh_destroy(struct neighbour *neigh)
686 {
687         struct net_device *dev = neigh->dev;
688
689         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
690
691         if (!neigh->dead) {
692                 pr_warn("Destroying alive neighbour %p\n", neigh);
693                 dump_stack();
694                 return;
695         }
696
697         if (neigh_del_timer(neigh))
698                 pr_warn("Impossible event\n");
699
700         write_lock_bh(&neigh->lock);
701         __skb_queue_purge(&neigh->arp_queue);
702         write_unlock_bh(&neigh->lock);
703         neigh->arp_queue_len_bytes = 0;
704
705         if (dev->netdev_ops->ndo_neigh_destroy)
706                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
707
708         dev_put(dev);
709         neigh_parms_put(neigh->parms);
710
711         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
712
713         atomic_dec(&neigh->tbl->entries);
714         kfree_rcu(neigh, rcu);
715 }
716 EXPORT_SYMBOL(neigh_destroy);
717
718 /* Neighbour state is suspicious;
719    disable fast path.
720
721    Called with write_locked neigh.
722  */
723 static void neigh_suspect(struct neighbour *neigh)
724 {
725         neigh_dbg(2, "neigh %p is suspected\n", neigh);
726
727         neigh->output = neigh->ops->output;
728 }
729
730 /* Neighbour state is OK;
731    enable fast path.
732
733    Called with write_locked neigh.
734  */
735 static void neigh_connect(struct neighbour *neigh)
736 {
737         neigh_dbg(2, "neigh %p is connected\n", neigh);
738
739         neigh->output = neigh->ops->connected_output;
740 }
741
742 static void neigh_periodic_work(struct work_struct *work)
743 {
744         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
745         struct neighbour *n;
746         struct neighbour __rcu **np;
747         unsigned int i;
748         struct neigh_hash_table *nht;
749
750         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
751
752         write_lock_bh(&tbl->lock);
753         nht = rcu_dereference_protected(tbl->nht,
754                                         lockdep_is_held(&tbl->lock));
755
756         /*
757          *      periodically recompute ReachableTime from random function
758          */
759
760         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
761                 struct neigh_parms *p;
762                 tbl->last_rand = jiffies;
763                 list_for_each_entry(p, &tbl->parms_list, list)
764                         p->reachable_time =
765                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
766         }
767
768         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
769                 goto out;
770
771         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
772                 np = &nht->hash_buckets[i];
773
774                 while ((n = rcu_dereference_protected(*np,
775                                 lockdep_is_held(&tbl->lock))) != NULL) {
776                         unsigned int state;
777
778                         write_lock(&n->lock);
779
780                         state = n->nud_state;
781                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
782                                 write_unlock(&n->lock);
783                                 goto next_elt;
784                         }
785
786                         if (time_before(n->used, n->confirmed))
787                                 n->used = n->confirmed;
788
789                         if (atomic_read(&n->refcnt) == 1 &&
790                             (state == NUD_FAILED ||
791                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
792                                 *np = n->next;
793                                 n->dead = 1;
794                                 write_unlock(&n->lock);
795                                 neigh_cleanup_and_release(n);
796                                 continue;
797                         }
798                         write_unlock(&n->lock);
799
800 next_elt:
801                         np = &n->next;
802                 }
803                 /*
804                  * It's fine to release lock here, even if hash table
805                  * grows while we are preempted.
806                  */
807                 write_unlock_bh(&tbl->lock);
808                 cond_resched();
809                 write_lock_bh(&tbl->lock);
810                 nht = rcu_dereference_protected(tbl->nht,
811                                                 lockdep_is_held(&tbl->lock));
812         }
813 out:
814         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
815          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
816          * BASE_REACHABLE_TIME.
817          */
818         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
819                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
820         write_unlock_bh(&tbl->lock);
821 }
822
823 static __inline__ int neigh_max_probes(struct neighbour *n)
824 {
825         struct neigh_parms *p = n->parms;
826         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
827                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
828                 NEIGH_VAR(p, MCAST_PROBES));
829 }
830
831 static void neigh_invalidate(struct neighbour *neigh)
832         __releases(neigh->lock)
833         __acquires(neigh->lock)
834 {
835         struct sk_buff *skb;
836
837         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
838         neigh_dbg(2, "neigh %p is failed\n", neigh);
839         neigh->updated = jiffies;
840
841         /* It is very thin place. report_unreachable is very complicated
842            routine. Particularly, it can hit the same neighbour entry!
843
844            So that, we try to be accurate and avoid dead loop. --ANK
845          */
846         while (neigh->nud_state == NUD_FAILED &&
847                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
848                 write_unlock(&neigh->lock);
849                 neigh->ops->error_report(neigh, skb);
850                 write_lock(&neigh->lock);
851         }
852         __skb_queue_purge(&neigh->arp_queue);
853         neigh->arp_queue_len_bytes = 0;
854 }
855
856 static void neigh_probe(struct neighbour *neigh)
857         __releases(neigh->lock)
858 {
859         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
860         /* keep skb alive even if arp_queue overflows */
861         if (skb)
862                 skb = skb_clone(skb, GFP_ATOMIC);
863         write_unlock(&neigh->lock);
864         if (neigh->ops->solicit)
865                 neigh->ops->solicit(neigh, skb);
866         atomic_inc(&neigh->probes);
867         kfree_skb(skb);
868 }
869
870 /* Called when a timer expires for a neighbour entry. */
871
872 static void neigh_timer_handler(unsigned long arg)
873 {
874         unsigned long now, next;
875         struct neighbour *neigh = (struct neighbour *)arg;
876         unsigned int state;
877         int notify = 0;
878
879         write_lock(&neigh->lock);
880
881         state = neigh->nud_state;
882         now = jiffies;
883         next = now + HZ;
884
885         if (!(state & NUD_IN_TIMER))
886                 goto out;
887
888         if (state & NUD_REACHABLE) {
889                 if (time_before_eq(now,
890                                    neigh->confirmed + neigh->parms->reachable_time)) {
891                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
892                         next = neigh->confirmed + neigh->parms->reachable_time;
893                 } else if (time_before_eq(now,
894                                           neigh->used +
895                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
896                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
897                         neigh->nud_state = NUD_DELAY;
898                         neigh->updated = jiffies;
899                         neigh_suspect(neigh);
900                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
901                 } else {
902                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
903                         neigh->nud_state = NUD_STALE;
904                         neigh->updated = jiffies;
905                         neigh_suspect(neigh);
906                         notify = 1;
907                 }
908         } else if (state & NUD_DELAY) {
909                 if (time_before_eq(now,
910                                    neigh->confirmed +
911                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
912                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
913                         neigh->nud_state = NUD_REACHABLE;
914                         neigh->updated = jiffies;
915                         neigh_connect(neigh);
916                         notify = 1;
917                         next = neigh->confirmed + neigh->parms->reachable_time;
918                 } else {
919                         neigh_dbg(2, "neigh %p is probed\n", neigh);
920                         neigh->nud_state = NUD_PROBE;
921                         neigh->updated = jiffies;
922                         atomic_set(&neigh->probes, 0);
923                         notify = 1;
924                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
925                 }
926         } else {
927                 /* NUD_PROBE|NUD_INCOMPLETE */
928                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
929         }
930
931         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
932             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
933                 neigh->nud_state = NUD_FAILED;
934                 notify = 1;
935                 neigh_invalidate(neigh);
936                 goto out;
937         }
938
939         if (neigh->nud_state & NUD_IN_TIMER) {
940                 if (time_before(next, jiffies + HZ/2))
941                         next = jiffies + HZ/2;
942                 if (!mod_timer(&neigh->timer, next))
943                         neigh_hold(neigh);
944         }
945         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
946                 neigh_probe(neigh);
947         } else {
948 out:
949                 write_unlock(&neigh->lock);
950         }
951
952         if (notify)
953                 neigh_update_notify(neigh, 0);
954
955         neigh_release(neigh);
956 }
957
958 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
959 {
960         int rc;
961         bool immediate_probe = false;
962
963         write_lock_bh(&neigh->lock);
964
965         rc = 0;
966         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
967                 goto out_unlock_bh;
968         if (neigh->dead)
969                 goto out_dead;
970
971         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
972                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
973                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
974                         unsigned long next, now = jiffies;
975
976                         atomic_set(&neigh->probes,
977                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
978                         neigh->nud_state     = NUD_INCOMPLETE;
979                         neigh->updated = now;
980                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
981                                          HZ/2);
982                         neigh_add_timer(neigh, next);
983                         immediate_probe = true;
984                 } else {
985                         neigh->nud_state = NUD_FAILED;
986                         neigh->updated = jiffies;
987                         write_unlock_bh(&neigh->lock);
988
989                         kfree_skb(skb);
990                         return 1;
991                 }
992         } else if (neigh->nud_state & NUD_STALE) {
993                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
994                 neigh->nud_state = NUD_DELAY;
995                 neigh->updated = jiffies;
996                 neigh_add_timer(neigh, jiffies +
997                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
998         }
999
1000         if (neigh->nud_state == NUD_INCOMPLETE) {
1001                 if (skb) {
1002                         while (neigh->arp_queue_len_bytes + skb->truesize >
1003                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1004                                 struct sk_buff *buff;
1005
1006                                 buff = __skb_dequeue(&neigh->arp_queue);
1007                                 if (!buff)
1008                                         break;
1009                                 neigh->arp_queue_len_bytes -= buff->truesize;
1010                                 kfree_skb(buff);
1011                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1012                         }
1013                         skb_dst_force(skb);
1014                         __skb_queue_tail(&neigh->arp_queue, skb);
1015                         neigh->arp_queue_len_bytes += skb->truesize;
1016                 }
1017                 rc = 1;
1018         }
1019 out_unlock_bh:
1020         if (immediate_probe)
1021                 neigh_probe(neigh);
1022         else
1023                 write_unlock(&neigh->lock);
1024         local_bh_enable();
1025         return rc;
1026
1027 out_dead:
1028         if (neigh->nud_state & NUD_STALE)
1029                 goto out_unlock_bh;
1030         write_unlock_bh(&neigh->lock);
1031         kfree_skb(skb);
1032         return 1;
1033 }
1034 EXPORT_SYMBOL(__neigh_event_send);
1035
1036 static void neigh_update_hhs(struct neighbour *neigh)
1037 {
1038         struct hh_cache *hh;
1039         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1040                 = NULL;
1041
1042         if (neigh->dev->header_ops)
1043                 update = neigh->dev->header_ops->cache_update;
1044
1045         if (update) {
1046                 hh = &neigh->hh;
1047                 if (hh->hh_len) {
1048                         write_seqlock_bh(&hh->hh_lock);
1049                         update(hh, neigh->dev, neigh->ha);
1050                         write_sequnlock_bh(&hh->hh_lock);
1051                 }
1052         }
1053 }
1054
1055
1056
1057 /* Generic update routine.
1058    -- lladdr is new lladdr or NULL, if it is not supplied.
1059    -- new    is new state.
1060    -- flags
1061         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1062                                 if it is different.
1063         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1064                                 lladdr instead of overriding it
1065                                 if it is different.
1066         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1067
1068         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1069                                 NTF_ROUTER flag.
1070         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1071                                 a router.
1072
1073    Caller MUST hold reference count on the entry.
1074  */
1075
1076 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1077                  u32 flags, u32 nlmsg_pid)
1078 {
1079         u8 old;
1080         int err;
1081         int notify = 0;
1082         struct net_device *dev;
1083         int update_isrouter = 0;
1084
1085         write_lock_bh(&neigh->lock);
1086
1087         dev    = neigh->dev;
1088         old    = neigh->nud_state;
1089         err    = -EPERM;
1090
1091         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1092             (old & (NUD_NOARP | NUD_PERMANENT)))
1093                 goto out;
1094         if (neigh->dead)
1095                 goto out;
1096
1097         if (!(new & NUD_VALID)) {
1098                 neigh_del_timer(neigh);
1099                 if (old & NUD_CONNECTED)
1100                         neigh_suspect(neigh);
1101                 neigh->nud_state = new;
1102                 err = 0;
1103                 notify = old & NUD_VALID;
1104                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1105                     (new & NUD_FAILED)) {
1106                         neigh_invalidate(neigh);
1107                         notify = 1;
1108                 }
1109                 goto out;
1110         }
1111
1112         /* Compare new lladdr with cached one */
1113         if (!dev->addr_len) {
1114                 /* First case: device needs no address. */
1115                 lladdr = neigh->ha;
1116         } else if (lladdr) {
1117                 /* The second case: if something is already cached
1118                    and a new address is proposed:
1119                    - compare new & old
1120                    - if they are different, check override flag
1121                  */
1122                 if ((old & NUD_VALID) &&
1123                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1124                         lladdr = neigh->ha;
1125         } else {
1126                 /* No address is supplied; if we know something,
1127                    use it, otherwise discard the request.
1128                  */
1129                 err = -EINVAL;
1130                 if (!(old & NUD_VALID))
1131                         goto out;
1132                 lladdr = neigh->ha;
1133         }
1134
1135         if (new & NUD_CONNECTED)
1136                 neigh->confirmed = jiffies;
1137         neigh->updated = jiffies;
1138
1139         /* If entry was valid and address is not changed,
1140            do not change entry state, if new one is STALE.
1141          */
1142         err = 0;
1143         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1144         if (old & NUD_VALID) {
1145                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1146                         update_isrouter = 0;
1147                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1148                             (old & NUD_CONNECTED)) {
1149                                 lladdr = neigh->ha;
1150                                 new = NUD_STALE;
1151                         } else
1152                                 goto out;
1153                 } else {
1154                         if (lladdr == neigh->ha && new == NUD_STALE &&
1155                             !(flags & NEIGH_UPDATE_F_ADMIN))
1156                                 new = old;
1157                 }
1158         }
1159
1160         if (new != old) {
1161                 neigh_del_timer(neigh);
1162                 if (new & NUD_PROBE)
1163                         atomic_set(&neigh->probes, 0);
1164                 if (new & NUD_IN_TIMER)
1165                         neigh_add_timer(neigh, (jiffies +
1166                                                 ((new & NUD_REACHABLE) ?
1167                                                  neigh->parms->reachable_time :
1168                                                  0)));
1169                 neigh->nud_state = new;
1170                 notify = 1;
1171         }
1172
1173         if (lladdr != neigh->ha) {
1174                 write_seqlock(&neigh->ha_lock);
1175                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1176                 write_sequnlock(&neigh->ha_lock);
1177                 neigh_update_hhs(neigh);
1178                 if (!(new & NUD_CONNECTED))
1179                         neigh->confirmed = jiffies -
1180                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1181                 notify = 1;
1182         }
1183         if (new == old)
1184                 goto out;
1185         if (new & NUD_CONNECTED)
1186                 neigh_connect(neigh);
1187         else
1188                 neigh_suspect(neigh);
1189         if (!(old & NUD_VALID)) {
1190                 struct sk_buff *skb;
1191
1192                 /* Again: avoid dead loop if something went wrong */
1193
1194                 while (neigh->nud_state & NUD_VALID &&
1195                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196                         struct dst_entry *dst = skb_dst(skb);
1197                         struct neighbour *n2, *n1 = neigh;
1198                         write_unlock_bh(&neigh->lock);
1199
1200                         rcu_read_lock();
1201
1202                         /* Why not just use 'neigh' as-is?  The problem is that
1203                          * things such as shaper, eql, and sch_teql can end up
1204                          * using alternative, different, neigh objects to output
1205                          * the packet in the output path.  So what we need to do
1206                          * here is re-lookup the top-level neigh in the path so
1207                          * we can reinject the packet there.
1208                          */
1209                         n2 = NULL;
1210                         if (dst) {
1211                                 n2 = dst_neigh_lookup_skb(dst, skb);
1212                                 if (n2)
1213                                         n1 = n2;
1214                         }
1215                         n1->output(n1, skb);
1216                         if (n2)
1217                                 neigh_release(n2);
1218                         rcu_read_unlock();
1219
1220                         write_lock_bh(&neigh->lock);
1221                 }
1222                 __skb_queue_purge(&neigh->arp_queue);
1223                 neigh->arp_queue_len_bytes = 0;
1224         }
1225 out:
1226         if (update_isrouter) {
1227                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1228                         (neigh->flags | NTF_ROUTER) :
1229                         (neigh->flags & ~NTF_ROUTER);
1230         }
1231         write_unlock_bh(&neigh->lock);
1232
1233         if (notify)
1234                 neigh_update_notify(neigh, nlmsg_pid);
1235
1236         return err;
1237 }
1238 EXPORT_SYMBOL(neigh_update);
1239
1240 /* Update the neigh to listen temporarily for probe responses, even if it is
1241  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1242  */
1243 void __neigh_set_probe_once(struct neighbour *neigh)
1244 {
1245         if (neigh->dead)
1246                 return;
1247         neigh->updated = jiffies;
1248         if (!(neigh->nud_state & NUD_FAILED))
1249                 return;
1250         neigh->nud_state = NUD_INCOMPLETE;
1251         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1252         neigh_add_timer(neigh,
1253                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1254 }
1255 EXPORT_SYMBOL(__neigh_set_probe_once);
1256
1257 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1258                                  u8 *lladdr, void *saddr,
1259                                  struct net_device *dev)
1260 {
1261         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1262                                                  lladdr || !dev->addr_len);
1263         if (neigh)
1264                 neigh_update(neigh, lladdr, NUD_STALE,
1265                              NEIGH_UPDATE_F_OVERRIDE, 0);
1266         return neigh;
1267 }
1268 EXPORT_SYMBOL(neigh_event_ns);
1269
1270 /* called with read_lock_bh(&n->lock); */
1271 static void neigh_hh_init(struct neighbour *n)
1272 {
1273         struct net_device *dev = n->dev;
1274         __be16 prot = n->tbl->protocol;
1275         struct hh_cache *hh = &n->hh;
1276
1277         write_lock_bh(&n->lock);
1278
1279         /* Only one thread can come in here and initialize the
1280          * hh_cache entry.
1281          */
1282         if (!hh->hh_len)
1283                 dev->header_ops->cache(n, hh, prot);
1284
1285         write_unlock_bh(&n->lock);
1286 }
1287
1288 /* Slow and careful. */
1289
1290 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1291 {
1292         int rc = 0;
1293
1294         if (!neigh_event_send(neigh, skb)) {
1295                 int err;
1296                 struct net_device *dev = neigh->dev;
1297                 unsigned int seq;
1298
1299                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1300                         neigh_hh_init(neigh);
1301
1302                 do {
1303                         __skb_pull(skb, skb_network_offset(skb));
1304                         seq = read_seqbegin(&neigh->ha_lock);
1305                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306                                               neigh->ha, NULL, skb->len);
1307                 } while (read_seqretry(&neigh->ha_lock, seq));
1308
1309                 if (err >= 0)
1310                         rc = dev_queue_xmit(skb);
1311                 else
1312                         goto out_kfree_skb;
1313         }
1314 out:
1315         return rc;
1316 out_kfree_skb:
1317         rc = -EINVAL;
1318         kfree_skb(skb);
1319         goto out;
1320 }
1321 EXPORT_SYMBOL(neigh_resolve_output);
1322
1323 /* As fast as possible without hh cache */
1324
1325 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1326 {
1327         struct net_device *dev = neigh->dev;
1328         unsigned int seq;
1329         int err;
1330
1331         do {
1332                 __skb_pull(skb, skb_network_offset(skb));
1333                 seq = read_seqbegin(&neigh->ha_lock);
1334                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335                                       neigh->ha, NULL, skb->len);
1336         } while (read_seqretry(&neigh->ha_lock, seq));
1337
1338         if (err >= 0)
1339                 err = dev_queue_xmit(skb);
1340         else {
1341                 err = -EINVAL;
1342                 kfree_skb(skb);
1343         }
1344         return err;
1345 }
1346 EXPORT_SYMBOL(neigh_connected_output);
1347
1348 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1349 {
1350         return dev_queue_xmit(skb);
1351 }
1352 EXPORT_SYMBOL(neigh_direct_output);
1353
1354 static void neigh_proxy_process(unsigned long arg)
1355 {
1356         struct neigh_table *tbl = (struct neigh_table *)arg;
1357         long sched_next = 0;
1358         unsigned long now = jiffies;
1359         struct sk_buff *skb, *n;
1360
1361         spin_lock(&tbl->proxy_queue.lock);
1362
1363         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364                 long tdif = NEIGH_CB(skb)->sched_next - now;
1365
1366                 if (tdif <= 0) {
1367                         struct net_device *dev = skb->dev;
1368
1369                         __skb_unlink(skb, &tbl->proxy_queue);
1370                         if (tbl->proxy_redo && netif_running(dev)) {
1371                                 rcu_read_lock();
1372                                 tbl->proxy_redo(skb);
1373                                 rcu_read_unlock();
1374                         } else {
1375                                 kfree_skb(skb);
1376                         }
1377
1378                         dev_put(dev);
1379                 } else if (!sched_next || tdif < sched_next)
1380                         sched_next = tdif;
1381         }
1382         del_timer(&tbl->proxy_timer);
1383         if (sched_next)
1384                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385         spin_unlock(&tbl->proxy_queue.lock);
1386 }
1387
1388 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389                     struct sk_buff *skb)
1390 {
1391         unsigned long now = jiffies;
1392
1393         unsigned long sched_next = now + (prandom_u32() %
1394                                           NEIGH_VAR(p, PROXY_DELAY));
1395
1396         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1397                 kfree_skb(skb);
1398                 return;
1399         }
1400
1401         NEIGH_CB(skb)->sched_next = sched_next;
1402         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403
1404         spin_lock(&tbl->proxy_queue.lock);
1405         if (del_timer(&tbl->proxy_timer)) {
1406                 if (time_before(tbl->proxy_timer.expires, sched_next))
1407                         sched_next = tbl->proxy_timer.expires;
1408         }
1409         skb_dst_drop(skb);
1410         dev_hold(skb->dev);
1411         __skb_queue_tail(&tbl->proxy_queue, skb);
1412         mod_timer(&tbl->proxy_timer, sched_next);
1413         spin_unlock(&tbl->proxy_queue.lock);
1414 }
1415 EXPORT_SYMBOL(pneigh_enqueue);
1416
1417 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418                                                       struct net *net, int ifindex)
1419 {
1420         struct neigh_parms *p;
1421
1422         list_for_each_entry(p, &tbl->parms_list, list) {
1423                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1425                         return p;
1426         }
1427
1428         return NULL;
1429 }
1430
1431 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432                                       struct neigh_table *tbl)
1433 {
1434         struct neigh_parms *p;
1435         struct net *net = dev_net(dev);
1436         const struct net_device_ops *ops = dev->netdev_ops;
1437
1438         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1439         if (p) {
1440                 p->tbl            = tbl;
1441                 atomic_set(&p->refcnt, 1);
1442                 p->reachable_time =
1443                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1444                 dev_hold(dev);
1445                 p->dev = dev;
1446                 write_pnet(&p->net, net);
1447                 p->sysctl_table = NULL;
1448
1449                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1450                         dev_put(dev);
1451                         kfree(p);
1452                         return NULL;
1453                 }
1454
1455                 write_lock_bh(&tbl->lock);
1456                 list_add(&p->list, &tbl->parms.list);
1457                 write_unlock_bh(&tbl->lock);
1458
1459                 neigh_parms_data_state_cleanall(p);
1460         }
1461         return p;
1462 }
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1464
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1466 {
1467         struct neigh_parms *parms =
1468                 container_of(head, struct neigh_parms, rcu_head);
1469
1470         neigh_parms_put(parms);
1471 }
1472
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474 {
1475         if (!parms || parms == &tbl->parms)
1476                 return;
1477         write_lock_bh(&tbl->lock);
1478         list_del(&parms->list);
1479         parms->dead = 1;
1480         write_unlock_bh(&tbl->lock);
1481         if (parms->dev)
1482                 dev_put(parms->dev);
1483         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1484 }
1485 EXPORT_SYMBOL(neigh_parms_release);
1486
1487 static void neigh_parms_destroy(struct neigh_parms *parms)
1488 {
1489         kfree(parms);
1490 }
1491
1492 static struct lock_class_key neigh_table_proxy_queue_class;
1493
1494 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1495
1496 void neigh_table_init(int index, struct neigh_table *tbl)
1497 {
1498         unsigned long now = jiffies;
1499         unsigned long phsize;
1500
1501         INIT_LIST_HEAD(&tbl->parms_list);
1502         list_add(&tbl->parms.list, &tbl->parms_list);
1503         write_pnet(&tbl->parms.net, &init_net);
1504         atomic_set(&tbl->parms.refcnt, 1);
1505         tbl->parms.reachable_time =
1506                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1507
1508         tbl->stats = alloc_percpu(struct neigh_statistics);
1509         if (!tbl->stats)
1510                 panic("cannot create neighbour cache statistics");
1511
1512 #ifdef CONFIG_PROC_FS
1513         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1514                               &neigh_stat_seq_fops, tbl))
1515                 panic("cannot create neighbour proc dir entry");
1516 #endif
1517
1518         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1519
1520         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1521         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1522
1523         if (!tbl->nht || !tbl->phash_buckets)
1524                 panic("cannot allocate neighbour cache hashes");
1525
1526         if (!tbl->entry_size)
1527                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1528                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1529         else
1530                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1531
1532         rwlock_init(&tbl->lock);
1533         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1534         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1535                         tbl->parms.reachable_time);
1536         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1537         skb_queue_head_init_class(&tbl->proxy_queue,
1538                         &neigh_table_proxy_queue_class);
1539
1540         tbl->last_flush = now;
1541         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1542
1543         neigh_tables[index] = tbl;
1544 }
1545 EXPORT_SYMBOL(neigh_table_init);
1546
1547 int neigh_table_clear(int index, struct neigh_table *tbl)
1548 {
1549         neigh_tables[index] = NULL;
1550         /* It is not clean... Fix it to unload IPv6 module safely */
1551         cancel_delayed_work_sync(&tbl->gc_work);
1552         del_timer_sync(&tbl->proxy_timer);
1553         pneigh_queue_purge(&tbl->proxy_queue);
1554         neigh_ifdown(tbl, NULL);
1555         if (atomic_read(&tbl->entries))
1556                 pr_crit("neighbour leakage\n");
1557
1558         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1559                  neigh_hash_free_rcu);
1560         tbl->nht = NULL;
1561
1562         kfree(tbl->phash_buckets);
1563         tbl->phash_buckets = NULL;
1564
1565         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1566
1567         free_percpu(tbl->stats);
1568         tbl->stats = NULL;
1569
1570         return 0;
1571 }
1572 EXPORT_SYMBOL(neigh_table_clear);
1573
1574 static struct neigh_table *neigh_find_table(int family)
1575 {
1576         struct neigh_table *tbl = NULL;
1577
1578         switch (family) {
1579         case AF_INET:
1580                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1581                 break;
1582         case AF_INET6:
1583                 tbl = neigh_tables[NEIGH_ND_TABLE];
1584                 break;
1585         case AF_DECnet:
1586                 tbl = neigh_tables[NEIGH_DN_TABLE];
1587                 break;
1588         }
1589
1590         return tbl;
1591 }
1592
1593 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1594 {
1595         struct net *net = sock_net(skb->sk);
1596         struct ndmsg *ndm;
1597         struct nlattr *dst_attr;
1598         struct neigh_table *tbl;
1599         struct neighbour *neigh;
1600         struct net_device *dev = NULL;
1601         int err = -EINVAL;
1602
1603         ASSERT_RTNL();
1604         if (nlmsg_len(nlh) < sizeof(*ndm))
1605                 goto out;
1606
1607         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1608         if (dst_attr == NULL)
1609                 goto out;
1610
1611         ndm = nlmsg_data(nlh);
1612         if (ndm->ndm_ifindex) {
1613                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1614                 if (dev == NULL) {
1615                         err = -ENODEV;
1616                         goto out;
1617                 }
1618         }
1619
1620         tbl = neigh_find_table(ndm->ndm_family);
1621         if (tbl == NULL)
1622                 return -EAFNOSUPPORT;
1623
1624         if (nla_len(dst_attr) < tbl->key_len)
1625                 goto out;
1626
1627         if (ndm->ndm_flags & NTF_PROXY) {
1628                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1629                 goto out;
1630         }
1631
1632         if (dev == NULL)
1633                 goto out;
1634
1635         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1636         if (neigh == NULL) {
1637                 err = -ENOENT;
1638                 goto out;
1639         }
1640
1641         err = neigh_update(neigh, NULL, NUD_FAILED,
1642                            NEIGH_UPDATE_F_OVERRIDE |
1643                            NEIGH_UPDATE_F_ADMIN,
1644                            NETLINK_CB(skb).portid);
1645         neigh_release(neigh);
1646
1647 out:
1648         return err;
1649 }
1650
1651 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1652 {
1653         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1654         struct net *net = sock_net(skb->sk);
1655         struct ndmsg *ndm;
1656         struct nlattr *tb[NDA_MAX+1];
1657         struct neigh_table *tbl;
1658         struct net_device *dev = NULL;
1659         struct neighbour *neigh;
1660         void *dst, *lladdr;
1661         int err;
1662
1663         ASSERT_RTNL();
1664         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, NULL);
1665         if (err < 0)
1666                 goto out;
1667
1668         err = -EINVAL;
1669         if (tb[NDA_DST] == NULL)
1670                 goto out;
1671
1672         ndm = nlmsg_data(nlh);
1673         if (ndm->ndm_ifindex) {
1674                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1675                 if (dev == NULL) {
1676                         err = -ENODEV;
1677                         goto out;
1678                 }
1679
1680                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1681                         goto out;
1682         }
1683
1684         tbl = neigh_find_table(ndm->ndm_family);
1685         if (tbl == NULL)
1686                 return -EAFNOSUPPORT;
1687
1688         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1689                 goto out;
1690         dst = nla_data(tb[NDA_DST]);
1691         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1692
1693         if (ndm->ndm_flags & NTF_PROXY) {
1694                 struct pneigh_entry *pn;
1695
1696                 err = -ENOBUFS;
1697                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1698                 if (pn) {
1699                         pn->flags = ndm->ndm_flags;
1700                         err = 0;
1701                 }
1702                 goto out;
1703         }
1704
1705         if (dev == NULL)
1706                 goto out;
1707
1708         neigh = neigh_lookup(tbl, dst, dev);
1709         if (neigh == NULL) {
1710                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1711                         err = -ENOENT;
1712                         goto out;
1713                 }
1714
1715                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1716                 if (IS_ERR(neigh)) {
1717                         err = PTR_ERR(neigh);
1718                         goto out;
1719                 }
1720         } else {
1721                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1722                         err = -EEXIST;
1723                         neigh_release(neigh);
1724                         goto out;
1725                 }
1726
1727                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1728                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1729         }
1730
1731         if (ndm->ndm_flags & NTF_USE) {
1732                 neigh_event_send(neigh, NULL);
1733                 err = 0;
1734         } else
1735                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1736                                    NETLINK_CB(skb).portid);
1737         neigh_release(neigh);
1738
1739 out:
1740         return err;
1741 }
1742
1743 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1744 {
1745         struct nlattr *nest;
1746
1747         nest = nla_nest_start(skb, NDTA_PARMS);
1748         if (nest == NULL)
1749                 return -ENOBUFS;
1750
1751         if ((parms->dev &&
1752              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1753             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1754             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1755                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1756             /* approximative value for deprecated QUEUE_LEN (in packets) */
1757             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1758                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1759             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1760             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1761             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1762                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1763             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1764                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1765             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1766                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1767             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1768                           NDTPA_PAD) ||
1769             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1770                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1771             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1772                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1773             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1774                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1775             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1776                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1777             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1778                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1779             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1780                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1781             nla_put_msecs(skb, NDTPA_LOCKTIME,
1782                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1783                 goto nla_put_failure;
1784         return nla_nest_end(skb, nest);
1785
1786 nla_put_failure:
1787         nla_nest_cancel(skb, nest);
1788         return -EMSGSIZE;
1789 }
1790
1791 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1792                               u32 pid, u32 seq, int type, int flags)
1793 {
1794         struct nlmsghdr *nlh;
1795         struct ndtmsg *ndtmsg;
1796
1797         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1798         if (nlh == NULL)
1799                 return -EMSGSIZE;
1800
1801         ndtmsg = nlmsg_data(nlh);
1802
1803         read_lock_bh(&tbl->lock);
1804         ndtmsg->ndtm_family = tbl->family;
1805         ndtmsg->ndtm_pad1   = 0;
1806         ndtmsg->ndtm_pad2   = 0;
1807
1808         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1809             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1810             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1811             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1812             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1813                 goto nla_put_failure;
1814         {
1815                 unsigned long now = jiffies;
1816                 unsigned int flush_delta = now - tbl->last_flush;
1817                 unsigned int rand_delta = now - tbl->last_rand;
1818                 struct neigh_hash_table *nht;
1819                 struct ndt_config ndc = {
1820                         .ndtc_key_len           = tbl->key_len,
1821                         .ndtc_entry_size        = tbl->entry_size,
1822                         .ndtc_entries           = atomic_read(&tbl->entries),
1823                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1824                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1825                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1826                 };
1827
1828                 rcu_read_lock_bh();
1829                 nht = rcu_dereference_bh(tbl->nht);
1830                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1831                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1832                 rcu_read_unlock_bh();
1833
1834                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1835                         goto nla_put_failure;
1836         }
1837
1838         {
1839                 int cpu;
1840                 struct ndt_stats ndst;
1841
1842                 memset(&ndst, 0, sizeof(ndst));
1843
1844                 for_each_possible_cpu(cpu) {
1845                         struct neigh_statistics *st;
1846
1847                         st = per_cpu_ptr(tbl->stats, cpu);
1848                         ndst.ndts_allocs                += st->allocs;
1849                         ndst.ndts_destroys              += st->destroys;
1850                         ndst.ndts_hash_grows            += st->hash_grows;
1851                         ndst.ndts_res_failed            += st->res_failed;
1852                         ndst.ndts_lookups               += st->lookups;
1853                         ndst.ndts_hits                  += st->hits;
1854                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1855                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1856                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1857                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1858                         ndst.ndts_table_fulls           += st->table_fulls;
1859                 }
1860
1861                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1862                                   NDTA_PAD))
1863                         goto nla_put_failure;
1864         }
1865
1866         BUG_ON(tbl->parms.dev);
1867         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1868                 goto nla_put_failure;
1869
1870         read_unlock_bh(&tbl->lock);
1871         nlmsg_end(skb, nlh);
1872         return 0;
1873
1874 nla_put_failure:
1875         read_unlock_bh(&tbl->lock);
1876         nlmsg_cancel(skb, nlh);
1877         return -EMSGSIZE;
1878 }
1879
1880 static int neightbl_fill_param_info(struct sk_buff *skb,
1881                                     struct neigh_table *tbl,
1882                                     struct neigh_parms *parms,
1883                                     u32 pid, u32 seq, int type,
1884                                     unsigned int flags)
1885 {
1886         struct ndtmsg *ndtmsg;
1887         struct nlmsghdr *nlh;
1888
1889         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1890         if (nlh == NULL)
1891                 return -EMSGSIZE;
1892
1893         ndtmsg = nlmsg_data(nlh);
1894
1895         read_lock_bh(&tbl->lock);
1896         ndtmsg->ndtm_family = tbl->family;
1897         ndtmsg->ndtm_pad1   = 0;
1898         ndtmsg->ndtm_pad2   = 0;
1899
1900         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1901             neightbl_fill_parms(skb, parms) < 0)
1902                 goto errout;
1903
1904         read_unlock_bh(&tbl->lock);
1905         nlmsg_end(skb, nlh);
1906         return 0;
1907 errout:
1908         read_unlock_bh(&tbl->lock);
1909         nlmsg_cancel(skb, nlh);
1910         return -EMSGSIZE;
1911 }
1912
1913 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1914         [NDTA_NAME]             = { .type = NLA_STRING },
1915         [NDTA_THRESH1]          = { .type = NLA_U32 },
1916         [NDTA_THRESH2]          = { .type = NLA_U32 },
1917         [NDTA_THRESH3]          = { .type = NLA_U32 },
1918         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1919         [NDTA_PARMS]            = { .type = NLA_NESTED },
1920 };
1921
1922 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1923         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1924         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1925         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1926         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1927         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1928         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1929         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1930         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1931         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1932         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1933         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1934         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1935         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1936         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1937 };
1938
1939 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1940 {
1941         struct net *net = sock_net(skb->sk);
1942         struct neigh_table *tbl;
1943         struct ndtmsg *ndtmsg;
1944         struct nlattr *tb[NDTA_MAX+1];
1945         bool found = false;
1946         int err, tidx;
1947
1948         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1949                           nl_neightbl_policy, NULL);
1950         if (err < 0)
1951                 goto errout;
1952
1953         if (tb[NDTA_NAME] == NULL) {
1954                 err = -EINVAL;
1955                 goto errout;
1956         }
1957
1958         ndtmsg = nlmsg_data(nlh);
1959
1960         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1961                 tbl = neigh_tables[tidx];
1962                 if (!tbl)
1963                         continue;
1964                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1965                         continue;
1966                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1967                         found = true;
1968                         break;
1969                 }
1970         }
1971
1972         if (!found)
1973                 return -ENOENT;
1974
1975         /*
1976          * We acquire tbl->lock to be nice to the periodic timers and
1977          * make sure they always see a consistent set of values.
1978          */
1979         write_lock_bh(&tbl->lock);
1980
1981         if (tb[NDTA_PARMS]) {
1982                 struct nlattr *tbp[NDTPA_MAX+1];
1983                 struct neigh_parms *p;
1984                 int i, ifindex = 0;
1985
1986                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1987                                        nl_ntbl_parm_policy, NULL);
1988                 if (err < 0)
1989                         goto errout_tbl_lock;
1990
1991                 if (tbp[NDTPA_IFINDEX])
1992                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1993
1994                 p = lookup_neigh_parms(tbl, net, ifindex);
1995                 if (p == NULL) {
1996                         err = -ENOENT;
1997                         goto errout_tbl_lock;
1998                 }
1999
2000                 for (i = 1; i <= NDTPA_MAX; i++) {
2001                         if (tbp[i] == NULL)
2002                                 continue;
2003
2004                         switch (i) {
2005                         case NDTPA_QUEUE_LEN:
2006                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2007                                               nla_get_u32(tbp[i]) *
2008                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2009                                 break;
2010                         case NDTPA_QUEUE_LENBYTES:
2011                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2012                                               nla_get_u32(tbp[i]));
2013                                 break;
2014                         case NDTPA_PROXY_QLEN:
2015                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2016                                               nla_get_u32(tbp[i]));
2017                                 break;
2018                         case NDTPA_APP_PROBES:
2019                                 NEIGH_VAR_SET(p, APP_PROBES,
2020                                               nla_get_u32(tbp[i]));
2021                                 break;
2022                         case NDTPA_UCAST_PROBES:
2023                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2024                                               nla_get_u32(tbp[i]));
2025                                 break;
2026                         case NDTPA_MCAST_PROBES:
2027                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2028                                               nla_get_u32(tbp[i]));
2029                                 break;
2030                         case NDTPA_MCAST_REPROBES:
2031                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2032                                               nla_get_u32(tbp[i]));
2033                                 break;
2034                         case NDTPA_BASE_REACHABLE_TIME:
2035                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2036                                               nla_get_msecs(tbp[i]));
2037                                 /* update reachable_time as well, otherwise, the change will
2038                                  * only be effective after the next time neigh_periodic_work
2039                                  * decides to recompute it (can be multiple minutes)
2040                                  */
2041                                 p->reachable_time =
2042                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2043                                 break;
2044                         case NDTPA_GC_STALETIME:
2045                                 NEIGH_VAR_SET(p, GC_STALETIME,
2046                                               nla_get_msecs(tbp[i]));
2047                                 break;
2048                         case NDTPA_DELAY_PROBE_TIME:
2049                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2050                                               nla_get_msecs(tbp[i]));
2051                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2052                                 break;
2053                         case NDTPA_RETRANS_TIME:
2054                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2055                                               nla_get_msecs(tbp[i]));
2056                                 break;
2057                         case NDTPA_ANYCAST_DELAY:
2058                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2059                                               nla_get_msecs(tbp[i]));
2060                                 break;
2061                         case NDTPA_PROXY_DELAY:
2062                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2063                                               nla_get_msecs(tbp[i]));
2064                                 break;
2065                         case NDTPA_LOCKTIME:
2066                                 NEIGH_VAR_SET(p, LOCKTIME,
2067                                               nla_get_msecs(tbp[i]));
2068                                 break;
2069                         }
2070                 }
2071         }
2072
2073         err = -ENOENT;
2074         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2075              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2076             !net_eq(net, &init_net))
2077                 goto errout_tbl_lock;
2078
2079         if (tb[NDTA_THRESH1])
2080                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2081
2082         if (tb[NDTA_THRESH2])
2083                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2084
2085         if (tb[NDTA_THRESH3])
2086                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2087
2088         if (tb[NDTA_GC_INTERVAL])
2089                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2090
2091         err = 0;
2092
2093 errout_tbl_lock:
2094         write_unlock_bh(&tbl->lock);
2095 errout:
2096         return err;
2097 }
2098
2099 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2100 {
2101         struct net *net = sock_net(skb->sk);
2102         int family, tidx, nidx = 0;
2103         int tbl_skip = cb->args[0];
2104         int neigh_skip = cb->args[1];
2105         struct neigh_table *tbl;
2106
2107         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2108
2109         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2110                 struct neigh_parms *p;
2111
2112                 tbl = neigh_tables[tidx];
2113                 if (!tbl)
2114                         continue;
2115
2116                 if (tidx < tbl_skip || (family && tbl->family != family))
2117                         continue;
2118
2119                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2120                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2121                                        NLM_F_MULTI) < 0)
2122                         break;
2123
2124                 nidx = 0;
2125                 p = list_next_entry(&tbl->parms, list);
2126                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2127                         if (!net_eq(neigh_parms_net(p), net))
2128                                 continue;
2129
2130                         if (nidx < neigh_skip)
2131                                 goto next;
2132
2133                         if (neightbl_fill_param_info(skb, tbl, p,
2134                                                      NETLINK_CB(cb->skb).portid,
2135                                                      cb->nlh->nlmsg_seq,
2136                                                      RTM_NEWNEIGHTBL,
2137                                                      NLM_F_MULTI) < 0)
2138                                 goto out;
2139                 next:
2140                         nidx++;
2141                 }
2142
2143                 neigh_skip = 0;
2144         }
2145 out:
2146         cb->args[0] = tidx;
2147         cb->args[1] = nidx;
2148
2149         return skb->len;
2150 }
2151
2152 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2153                            u32 pid, u32 seq, int type, unsigned int flags)
2154 {
2155         unsigned long now = jiffies;
2156         struct nda_cacheinfo ci;
2157         struct nlmsghdr *nlh;
2158         struct ndmsg *ndm;
2159
2160         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2161         if (nlh == NULL)
2162                 return -EMSGSIZE;
2163
2164         ndm = nlmsg_data(nlh);
2165         ndm->ndm_family  = neigh->ops->family;
2166         ndm->ndm_pad1    = 0;
2167         ndm->ndm_pad2    = 0;
2168         ndm->ndm_flags   = neigh->flags;
2169         ndm->ndm_type    = neigh->type;
2170         ndm->ndm_ifindex = neigh->dev->ifindex;
2171
2172         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2173                 goto nla_put_failure;
2174
2175         read_lock_bh(&neigh->lock);
2176         ndm->ndm_state   = neigh->nud_state;
2177         if (neigh->nud_state & NUD_VALID) {
2178                 char haddr[MAX_ADDR_LEN];
2179
2180                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2181                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2182                         read_unlock_bh(&neigh->lock);
2183                         goto nla_put_failure;
2184                 }
2185         }
2186
2187         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2188         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2189         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2190         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2191         read_unlock_bh(&neigh->lock);
2192
2193         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2194             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2195                 goto nla_put_failure;
2196
2197         nlmsg_end(skb, nlh);
2198         return 0;
2199
2200 nla_put_failure:
2201         nlmsg_cancel(skb, nlh);
2202         return -EMSGSIZE;
2203 }
2204
2205 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2206                             u32 pid, u32 seq, int type, unsigned int flags,
2207                             struct neigh_table *tbl)
2208 {
2209         struct nlmsghdr *nlh;
2210         struct ndmsg *ndm;
2211
2212         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2213         if (nlh == NULL)
2214                 return -EMSGSIZE;
2215
2216         ndm = nlmsg_data(nlh);
2217         ndm->ndm_family  = tbl->family;
2218         ndm->ndm_pad1    = 0;
2219         ndm->ndm_pad2    = 0;
2220         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2221         ndm->ndm_type    = RTN_UNICAST;
2222         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2223         ndm->ndm_state   = NUD_NONE;
2224
2225         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2226                 goto nla_put_failure;
2227
2228         nlmsg_end(skb, nlh);
2229         return 0;
2230
2231 nla_put_failure:
2232         nlmsg_cancel(skb, nlh);
2233         return -EMSGSIZE;
2234 }
2235
2236 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2237 {
2238         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2239         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2240 }
2241
2242 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2243 {
2244         struct net_device *master;
2245
2246         if (!master_idx)
2247                 return false;
2248
2249         master = netdev_master_upper_dev_get(dev);
2250         if (!master || master->ifindex != master_idx)
2251                 return true;
2252
2253         return false;
2254 }
2255
2256 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2257 {
2258         if (filter_idx && dev->ifindex != filter_idx)
2259                 return true;
2260
2261         return false;
2262 }
2263
2264 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2265                             struct netlink_callback *cb)
2266 {
2267         struct net *net = sock_net(skb->sk);
2268         const struct nlmsghdr *nlh = cb->nlh;
2269         struct nlattr *tb[NDA_MAX + 1];
2270         struct neighbour *n;
2271         int rc, h, s_h = cb->args[1];
2272         int idx, s_idx = idx = cb->args[2];
2273         struct neigh_hash_table *nht;
2274         int filter_master_idx = 0, filter_idx = 0;
2275         unsigned int flags = NLM_F_MULTI;
2276         int err;
2277
2278         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2279         if (!err) {
2280                 if (tb[NDA_IFINDEX])
2281                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2282
2283                 if (tb[NDA_MASTER])
2284                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2285
2286                 if (filter_idx || filter_master_idx)
2287                         flags |= NLM_F_DUMP_FILTERED;
2288         }
2289
2290         rcu_read_lock_bh();
2291         nht = rcu_dereference_bh(tbl->nht);
2292
2293         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2294                 if (h > s_h)
2295                         s_idx = 0;
2296                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2297                      n != NULL;
2298                      n = rcu_dereference_bh(n->next)) {
2299                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2300                                 goto next;
2301                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2302                             neigh_master_filtered(n->dev, filter_master_idx))
2303                                 goto next;
2304                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2305                                             cb->nlh->nlmsg_seq,
2306                                             RTM_NEWNEIGH,
2307                                             flags) < 0) {
2308                                 rc = -1;
2309                                 goto out;
2310                         }
2311 next:
2312                         idx++;
2313                 }
2314         }
2315         rc = skb->len;
2316 out:
2317         rcu_read_unlock_bh();
2318         cb->args[1] = h;
2319         cb->args[2] = idx;
2320         return rc;
2321 }
2322
2323 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2324                              struct netlink_callback *cb)
2325 {
2326         struct pneigh_entry *n;
2327         struct net *net = sock_net(skb->sk);
2328         int rc, h, s_h = cb->args[3];
2329         int idx, s_idx = idx = cb->args[4];
2330
2331         read_lock_bh(&tbl->lock);
2332
2333         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2334                 if (h > s_h)
2335                         s_idx = 0;
2336                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2337                         if (idx < s_idx || pneigh_net(n) != net)
2338                                 goto next;
2339                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2340                                             cb->nlh->nlmsg_seq,
2341                                             RTM_NEWNEIGH,
2342                                             NLM_F_MULTI, tbl) < 0) {
2343                                 read_unlock_bh(&tbl->lock);
2344                                 rc = -1;
2345                                 goto out;
2346                         }
2347                 next:
2348                         idx++;
2349                 }
2350         }
2351
2352         read_unlock_bh(&tbl->lock);
2353         rc = skb->len;
2354 out:
2355         cb->args[3] = h;
2356         cb->args[4] = idx;
2357         return rc;
2358
2359 }
2360
2361 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2362 {
2363         struct neigh_table *tbl;
2364         int t, family, s_t;
2365         int proxy = 0;
2366         int err;
2367
2368         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2369
2370         /* check for full ndmsg structure presence, family member is
2371          * the same for both structures
2372          */
2373         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2374             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2375                 proxy = 1;
2376
2377         s_t = cb->args[0];
2378
2379         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2380                 tbl = neigh_tables[t];
2381
2382                 if (!tbl)
2383                         continue;
2384                 if (t < s_t || (family && tbl->family != family))
2385                         continue;
2386                 if (t > s_t)
2387                         memset(&cb->args[1], 0, sizeof(cb->args) -
2388                                                 sizeof(cb->args[0]));
2389                 if (proxy)
2390                         err = pneigh_dump_table(tbl, skb, cb);
2391                 else
2392                         err = neigh_dump_table(tbl, skb, cb);
2393                 if (err < 0)
2394                         break;
2395         }
2396
2397         cb->args[0] = t;
2398         return skb->len;
2399 }
2400
2401 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2402 {
2403         int chain;
2404         struct neigh_hash_table *nht;
2405
2406         rcu_read_lock_bh();
2407         nht = rcu_dereference_bh(tbl->nht);
2408
2409         read_lock(&tbl->lock); /* avoid resizes */
2410         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2411                 struct neighbour *n;
2412
2413                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2414                      n != NULL;
2415                      n = rcu_dereference_bh(n->next))
2416                         cb(n, cookie);
2417         }
2418         read_unlock(&tbl->lock);
2419         rcu_read_unlock_bh();
2420 }
2421 EXPORT_SYMBOL(neigh_for_each);
2422
2423 /* The tbl->lock must be held as a writer and BH disabled. */
2424 void __neigh_for_each_release(struct neigh_table *tbl,
2425                               int (*cb)(struct neighbour *))
2426 {
2427         int chain;
2428         struct neigh_hash_table *nht;
2429
2430         nht = rcu_dereference_protected(tbl->nht,
2431                                         lockdep_is_held(&tbl->lock));
2432         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2433                 struct neighbour *n;
2434                 struct neighbour __rcu **np;
2435
2436                 np = &nht->hash_buckets[chain];
2437                 while ((n = rcu_dereference_protected(*np,
2438                                         lockdep_is_held(&tbl->lock))) != NULL) {
2439                         int release;
2440
2441                         write_lock(&n->lock);
2442                         release = cb(n);
2443                         if (release) {
2444                                 rcu_assign_pointer(*np,
2445                                         rcu_dereference_protected(n->next,
2446                                                 lockdep_is_held(&tbl->lock)));
2447                                 n->dead = 1;
2448                         } else
2449                                 np = &n->next;
2450                         write_unlock(&n->lock);
2451                         if (release)
2452                                 neigh_cleanup_and_release(n);
2453                 }
2454         }
2455 }
2456 EXPORT_SYMBOL(__neigh_for_each_release);
2457
2458 int neigh_xmit(int index, struct net_device *dev,
2459                const void *addr, struct sk_buff *skb)
2460 {
2461         int err = -EAFNOSUPPORT;
2462         if (likely(index < NEIGH_NR_TABLES)) {
2463                 struct neigh_table *tbl;
2464                 struct neighbour *neigh;
2465
2466                 tbl = neigh_tables[index];
2467                 if (!tbl)
2468                         goto out;
2469                 rcu_read_lock_bh();
2470                 neigh = __neigh_lookup_noref(tbl, addr, dev);
2471                 if (!neigh)
2472                         neigh = __neigh_create(tbl, addr, dev, false);
2473                 err = PTR_ERR(neigh);
2474                 if (IS_ERR(neigh)) {
2475                         rcu_read_unlock_bh();
2476                         goto out_kfree_skb;
2477                 }
2478                 err = neigh->output(neigh, skb);
2479                 rcu_read_unlock_bh();
2480         }
2481         else if (index == NEIGH_LINK_TABLE) {
2482                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2483                                       addr, NULL, skb->len);
2484                 if (err < 0)
2485                         goto out_kfree_skb;
2486                 err = dev_queue_xmit(skb);
2487         }
2488 out:
2489         return err;
2490 out_kfree_skb:
2491         kfree_skb(skb);
2492         goto out;
2493 }
2494 EXPORT_SYMBOL(neigh_xmit);
2495
2496 #ifdef CONFIG_PROC_FS
2497
2498 static struct neighbour *neigh_get_first(struct seq_file *seq)
2499 {
2500         struct neigh_seq_state *state = seq->private;
2501         struct net *net = seq_file_net(seq);
2502         struct neigh_hash_table *nht = state->nht;
2503         struct neighbour *n = NULL;
2504         int bucket = state->bucket;
2505
2506         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2507         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2508                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2509
2510                 while (n) {
2511                         if (!net_eq(dev_net(n->dev), net))
2512                                 goto next;
2513                         if (state->neigh_sub_iter) {
2514                                 loff_t fakep = 0;
2515                                 void *v;
2516
2517                                 v = state->neigh_sub_iter(state, n, &fakep);
2518                                 if (!v)
2519                                         goto next;
2520                         }
2521                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2522                                 break;
2523                         if (n->nud_state & ~NUD_NOARP)
2524                                 break;
2525 next:
2526                         n = rcu_dereference_bh(n->next);
2527                 }
2528
2529                 if (n)
2530                         break;
2531         }
2532         state->bucket = bucket;
2533
2534         return n;
2535 }
2536
2537 static struct neighbour *neigh_get_next(struct seq_file *seq,
2538                                         struct neighbour *n,
2539                                         loff_t *pos)
2540 {
2541         struct neigh_seq_state *state = seq->private;
2542         struct net *net = seq_file_net(seq);
2543         struct neigh_hash_table *nht = state->nht;
2544
2545         if (state->neigh_sub_iter) {
2546                 void *v = state->neigh_sub_iter(state, n, pos);
2547                 if (v)
2548                         return n;
2549         }
2550         n = rcu_dereference_bh(n->next);
2551
2552         while (1) {
2553                 while (n) {
2554                         if (!net_eq(dev_net(n->dev), net))
2555                                 goto next;
2556                         if (state->neigh_sub_iter) {
2557                                 void *v = state->neigh_sub_iter(state, n, pos);
2558                                 if (v)
2559                                         return n;
2560                                 goto next;
2561                         }
2562                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2563                                 break;
2564
2565                         if (n->nud_state & ~NUD_NOARP)
2566                                 break;
2567 next:
2568                         n = rcu_dereference_bh(n->next);
2569                 }
2570
2571                 if (n)
2572                         break;
2573
2574                 if (++state->bucket >= (1 << nht->hash_shift))
2575                         break;
2576
2577                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2578         }
2579
2580         if (n && pos)
2581                 --(*pos);
2582         return n;
2583 }
2584
2585 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2586 {
2587         struct neighbour *n = neigh_get_first(seq);
2588
2589         if (n) {
2590                 --(*pos);
2591                 while (*pos) {
2592                         n = neigh_get_next(seq, n, pos);
2593                         if (!n)
2594                                 break;
2595                 }
2596         }
2597         return *pos ? NULL : n;
2598 }
2599
2600 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2601 {
2602         struct neigh_seq_state *state = seq->private;
2603         struct net *net = seq_file_net(seq);
2604         struct neigh_table *tbl = state->tbl;
2605         struct pneigh_entry *pn = NULL;
2606         int bucket = state->bucket;
2607
2608         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2609         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2610                 pn = tbl->phash_buckets[bucket];
2611                 while (pn && !net_eq(pneigh_net(pn), net))
2612                         pn = pn->next;
2613                 if (pn)
2614                         break;
2615         }
2616         state->bucket = bucket;
2617
2618         return pn;
2619 }
2620
2621 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2622                                             struct pneigh_entry *pn,
2623                                             loff_t *pos)
2624 {
2625         struct neigh_seq_state *state = seq->private;
2626         struct net *net = seq_file_net(seq);
2627         struct neigh_table *tbl = state->tbl;
2628
2629         do {
2630                 pn = pn->next;
2631         } while (pn && !net_eq(pneigh_net(pn), net));
2632
2633         while (!pn) {
2634                 if (++state->bucket > PNEIGH_HASHMASK)
2635                         break;
2636                 pn = tbl->phash_buckets[state->bucket];
2637                 while (pn && !net_eq(pneigh_net(pn), net))
2638                         pn = pn->next;
2639                 if (pn)
2640                         break;
2641         }
2642
2643         if (pn && pos)
2644                 --(*pos);
2645
2646         return pn;
2647 }
2648
2649 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2650 {
2651         struct pneigh_entry *pn = pneigh_get_first(seq);
2652
2653         if (pn) {
2654                 --(*pos);
2655                 while (*pos) {
2656                         pn = pneigh_get_next(seq, pn, pos);
2657                         if (!pn)
2658                                 break;
2659                 }
2660         }
2661         return *pos ? NULL : pn;
2662 }
2663
2664 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2665 {
2666         struct neigh_seq_state *state = seq->private;
2667         void *rc;
2668         loff_t idxpos = *pos;
2669
2670         rc = neigh_get_idx(seq, &idxpos);
2671         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2672                 rc = pneigh_get_idx(seq, &idxpos);
2673
2674         return rc;
2675 }
2676
2677 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2678         __acquires(rcu_bh)
2679 {
2680         struct neigh_seq_state *state = seq->private;
2681
2682         state->tbl = tbl;
2683         state->bucket = 0;
2684         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2685
2686         rcu_read_lock_bh();
2687         state->nht = rcu_dereference_bh(tbl->nht);
2688
2689         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2690 }
2691 EXPORT_SYMBOL(neigh_seq_start);
2692
2693 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2694 {
2695         struct neigh_seq_state *state;
2696         void *rc;
2697
2698         if (v == SEQ_START_TOKEN) {
2699                 rc = neigh_get_first(seq);
2700                 goto out;
2701         }
2702
2703         state = seq->private;
2704         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2705                 rc = neigh_get_next(seq, v, NULL);
2706                 if (rc)
2707                         goto out;
2708                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2709                         rc = pneigh_get_first(seq);
2710         } else {
2711                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2712                 rc = pneigh_get_next(seq, v, NULL);
2713         }
2714 out:
2715         ++(*pos);
2716         return rc;
2717 }
2718 EXPORT_SYMBOL(neigh_seq_next);
2719
2720 void neigh_seq_stop(struct seq_file *seq, void *v)
2721         __releases(rcu_bh)
2722 {
2723         rcu_read_unlock_bh();
2724 }
2725 EXPORT_SYMBOL(neigh_seq_stop);
2726
2727 /* statistics via seq_file */
2728
2729 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2730 {
2731         struct neigh_table *tbl = seq->private;
2732         int cpu;
2733
2734         if (*pos == 0)
2735                 return SEQ_START_TOKEN;
2736
2737         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2738                 if (!cpu_possible(cpu))
2739                         continue;
2740                 *pos = cpu+1;
2741                 return per_cpu_ptr(tbl->stats, cpu);
2742         }
2743         return NULL;
2744 }
2745
2746 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2747 {
2748         struct neigh_table *tbl = seq->private;
2749         int cpu;
2750
2751         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2752                 if (!cpu_possible(cpu))
2753                         continue;
2754                 *pos = cpu+1;
2755                 return per_cpu_ptr(tbl->stats, cpu);
2756         }
2757         return NULL;
2758 }
2759
2760 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2761 {
2762
2763 }
2764
2765 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2766 {
2767         struct neigh_table *tbl = seq->private;
2768         struct neigh_statistics *st = v;
2769
2770         if (v == SEQ_START_TOKEN) {
2771                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2772                 return 0;
2773         }
2774
2775         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2776                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2777                    atomic_read(&tbl->entries),
2778
2779                    st->allocs,
2780                    st->destroys,
2781                    st->hash_grows,
2782
2783                    st->lookups,
2784                    st->hits,
2785
2786                    st->res_failed,
2787
2788                    st->rcv_probes_mcast,
2789                    st->rcv_probes_ucast,
2790
2791                    st->periodic_gc_runs,
2792                    st->forced_gc_runs,
2793                    st->unres_discards,
2794                    st->table_fulls
2795                    );
2796
2797         return 0;
2798 }
2799
2800 static const struct seq_operations neigh_stat_seq_ops = {
2801         .start  = neigh_stat_seq_start,
2802         .next   = neigh_stat_seq_next,
2803         .stop   = neigh_stat_seq_stop,
2804         .show   = neigh_stat_seq_show,
2805 };
2806
2807 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2808 {
2809         int ret = seq_open(file, &neigh_stat_seq_ops);
2810
2811         if (!ret) {
2812                 struct seq_file *sf = file->private_data;
2813                 sf->private = PDE_DATA(inode);
2814         }
2815         return ret;
2816 };
2817
2818 static const struct file_operations neigh_stat_seq_fops = {
2819         .owner   = THIS_MODULE,
2820         .open    = neigh_stat_seq_open,
2821         .read    = seq_read,
2822         .llseek  = seq_lseek,
2823         .release = seq_release,
2824 };
2825
2826 #endif /* CONFIG_PROC_FS */
2827
2828 static inline size_t neigh_nlmsg_size(void)
2829 {
2830         return NLMSG_ALIGN(sizeof(struct ndmsg))
2831                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2832                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2833                + nla_total_size(sizeof(struct nda_cacheinfo))
2834                + nla_total_size(4); /* NDA_PROBES */
2835 }
2836
2837 static void __neigh_notify(struct neighbour *n, int type, int flags,
2838                            u32 pid)
2839 {
2840         struct net *net = dev_net(n->dev);
2841         struct sk_buff *skb;
2842         int err = -ENOBUFS;
2843
2844         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2845         if (skb == NULL)
2846                 goto errout;
2847
2848         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2849         if (err < 0) {
2850                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2851                 WARN_ON(err == -EMSGSIZE);
2852                 kfree_skb(skb);
2853                 goto errout;
2854         }
2855         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2856         return;
2857 errout:
2858         if (err < 0)
2859                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2860 }
2861
2862 void neigh_app_ns(struct neighbour *n)
2863 {
2864         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2865 }
2866 EXPORT_SYMBOL(neigh_app_ns);
2867
2868 #ifdef CONFIG_SYSCTL
2869 static int zero;
2870 static int int_max = INT_MAX;
2871 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2872
2873 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2874                            void __user *buffer, size_t *lenp, loff_t *ppos)
2875 {
2876         int size, ret;
2877         struct ctl_table tmp = *ctl;
2878
2879         tmp.extra1 = &zero;
2880         tmp.extra2 = &unres_qlen_max;
2881         tmp.data = &size;
2882
2883         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2884         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2885
2886         if (write && !ret)
2887                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2888         return ret;
2889 }
2890
2891 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2892                                                    int family)
2893 {
2894         switch (family) {
2895         case AF_INET:
2896                 return __in_dev_arp_parms_get_rcu(dev);
2897         case AF_INET6:
2898                 return __in6_dev_nd_parms_get_rcu(dev);
2899         }
2900         return NULL;
2901 }
2902
2903 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2904                                   int index)
2905 {
2906         struct net_device *dev;
2907         int family = neigh_parms_family(p);
2908
2909         rcu_read_lock();
2910         for_each_netdev_rcu(net, dev) {
2911                 struct neigh_parms *dst_p =
2912                                 neigh_get_dev_parms_rcu(dev, family);
2913
2914                 if (dst_p && !test_bit(index, dst_p->data_state))
2915                         dst_p->data[index] = p->data[index];
2916         }
2917         rcu_read_unlock();
2918 }
2919
2920 static void neigh_proc_update(struct ctl_table *ctl, int write)
2921 {
2922         struct net_device *dev = ctl->extra1;
2923         struct neigh_parms *p = ctl->extra2;
2924         struct net *net = neigh_parms_net(p);
2925         int index = (int *) ctl->data - p->data;
2926
2927         if (!write)
2928                 return;
2929
2930         set_bit(index, p->data_state);
2931         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2932                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2933         if (!dev) /* NULL dev means this is default value */
2934                 neigh_copy_dflt_parms(net, p, index);
2935 }
2936
2937 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2938                                            void __user *buffer,
2939                                            size_t *lenp, loff_t *ppos)
2940 {
2941         struct ctl_table tmp = *ctl;
2942         int ret;
2943
2944         tmp.extra1 = &zero;
2945         tmp.extra2 = &int_max;
2946
2947         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2948         neigh_proc_update(ctl, write);
2949         return ret;
2950 }
2951
2952 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2953                         void __user *buffer, size_t *lenp, loff_t *ppos)
2954 {
2955         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2956
2957         neigh_proc_update(ctl, write);
2958         return ret;
2959 }
2960 EXPORT_SYMBOL(neigh_proc_dointvec);
2961
2962 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2963                                 void __user *buffer,
2964                                 size_t *lenp, loff_t *ppos)
2965 {
2966         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2967
2968         neigh_proc_update(ctl, write);
2969         return ret;
2970 }
2971 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2972
2973 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2974                                               void __user *buffer,
2975                                               size_t *lenp, loff_t *ppos)
2976 {
2977         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2978
2979         neigh_proc_update(ctl, write);
2980         return ret;
2981 }
2982
2983 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2984                                    void __user *buffer,
2985                                    size_t *lenp, loff_t *ppos)
2986 {
2987         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2988
2989         neigh_proc_update(ctl, write);
2990         return ret;
2991 }
2992 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2993
2994 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2995                                           void __user *buffer,
2996                                           size_t *lenp, loff_t *ppos)
2997 {
2998         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2999
3000         neigh_proc_update(ctl, write);
3001         return ret;
3002 }
3003
3004 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3005                                           void __user *buffer,
3006                                           size_t *lenp, loff_t *ppos)
3007 {
3008         struct neigh_parms *p = ctl->extra2;
3009         int ret;
3010
3011         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3012                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3013         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3014                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3015         else
3016                 ret = -1;
3017
3018         if (write && ret == 0) {
3019                 /* update reachable_time as well, otherwise, the change will
3020                  * only be effective after the next time neigh_periodic_work
3021                  * decides to recompute it
3022                  */
3023                 p->reachable_time =
3024                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3025         }
3026         return ret;
3027 }
3028
3029 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3030         (&((struct neigh_parms *) 0)->data[index])
3031
3032 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3033         [NEIGH_VAR_ ## attr] = { \
3034                 .procname       = name, \
3035                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3036                 .maxlen         = sizeof(int), \
3037                 .mode           = mval, \
3038                 .proc_handler   = proc, \
3039         }
3040
3041 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3042         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3043
3044 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3045         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3046
3047 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3048         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3049
3050 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3051         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3052
3053 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3054         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3055
3056 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3057         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3058
3059 static struct neigh_sysctl_table {
3060         struct ctl_table_header *sysctl_header;
3061         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3062 } neigh_sysctl_template __read_mostly = {
3063         .neigh_vars = {
3064                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3065                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3066                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3067                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3068                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3069                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3070                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3071                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3072                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3073                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3074                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3075                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3076                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3077                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3078                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3079                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3080                 [NEIGH_VAR_GC_INTERVAL] = {
3081                         .procname       = "gc_interval",
3082                         .maxlen         = sizeof(int),
3083                         .mode           = 0644,
3084                         .proc_handler   = proc_dointvec_jiffies,
3085                 },
3086                 [NEIGH_VAR_GC_THRESH1] = {
3087                         .procname       = "gc_thresh1",
3088                         .maxlen         = sizeof(int),
3089                         .mode           = 0644,
3090                         .extra1         = &zero,
3091                         .extra2         = &int_max,
3092                         .proc_handler   = proc_dointvec_minmax,
3093                 },
3094                 [NEIGH_VAR_GC_THRESH2] = {
3095                         .procname       = "gc_thresh2",
3096                         .maxlen         = sizeof(int),
3097                         .mode           = 0644,
3098                         .extra1         = &zero,
3099                         .extra2         = &int_max,
3100                         .proc_handler   = proc_dointvec_minmax,
3101                 },
3102                 [NEIGH_VAR_GC_THRESH3] = {
3103                         .procname       = "gc_thresh3",
3104                         .maxlen         = sizeof(int),
3105                         .mode           = 0644,
3106                         .extra1         = &zero,
3107                         .extra2         = &int_max,
3108                         .proc_handler   = proc_dointvec_minmax,
3109                 },
3110                 {},
3111         },
3112 };
3113
3114 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3115                           proc_handler *handler)
3116 {
3117         int i;
3118         struct neigh_sysctl_table *t;
3119         const char *dev_name_source;
3120         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3121         char *p_name;
3122
3123         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3124         if (!t)
3125                 goto err;
3126
3127         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3128                 t->neigh_vars[i].data += (long) p;
3129                 t->neigh_vars[i].extra1 = dev;
3130                 t->neigh_vars[i].extra2 = p;
3131         }
3132
3133         if (dev) {
3134                 dev_name_source = dev->name;
3135                 /* Terminate the table early */
3136                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3137                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3138         } else {
3139                 struct neigh_table *tbl = p->tbl;
3140                 dev_name_source = "default";
3141                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3142                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3143                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3144                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3145         }
3146
3147         if (handler) {
3148                 /* RetransTime */
3149                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3150                 /* ReachableTime */
3151                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3152                 /* RetransTime (in milliseconds)*/
3153                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3154                 /* ReachableTime (in milliseconds) */
3155                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3156         } else {
3157                 /* Those handlers will update p->reachable_time after
3158                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3159                  * applied after the next neighbour update instead of waiting for
3160                  * neigh_periodic_work to update its value (can be multiple minutes)
3161                  * So any handler that replaces them should do this as well
3162                  */
3163                 /* ReachableTime */
3164                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3165                         neigh_proc_base_reachable_time;
3166                 /* ReachableTime (in milliseconds) */
3167                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3168                         neigh_proc_base_reachable_time;
3169         }
3170
3171         /* Don't export sysctls to unprivileged users */
3172         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3173                 t->neigh_vars[0].procname = NULL;
3174
3175         switch (neigh_parms_family(p)) {
3176         case AF_INET:
3177               p_name = "ipv4";
3178               break;
3179         case AF_INET6:
3180               p_name = "ipv6";
3181               break;
3182         default:
3183               BUG();
3184         }
3185
3186         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3187                 p_name, dev_name_source);
3188         t->sysctl_header =
3189                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3190         if (!t->sysctl_header)
3191                 goto free;
3192
3193         p->sysctl_table = t;
3194         return 0;
3195
3196 free:
3197         kfree(t);
3198 err:
3199         return -ENOBUFS;
3200 }
3201 EXPORT_SYMBOL(neigh_sysctl_register);
3202
3203 void neigh_sysctl_unregister(struct neigh_parms *p)
3204 {
3205         if (p->sysctl_table) {
3206                 struct neigh_sysctl_table *t = p->sysctl_table;
3207                 p->sysctl_table = NULL;
3208                 unregister_net_sysctl_table(t->sysctl_header);
3209                 kfree(t);
3210         }
3211 }
3212 EXPORT_SYMBOL(neigh_sysctl_unregister);
3213
3214 #endif  /* CONFIG_SYSCTL */
3215
3216 static int __init neigh_init(void)
3217 {
3218         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3219         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3220         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3221
3222         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3223                       NULL);
3224         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3225
3226         return 0;
3227 }
3228
3229 subsys_initcall(neigh_init);
3230