Merge branch 'v3.7-samsung-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/upstream/kernel-adaptation-pc.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define NEIGH_DEBUG 1
43
44 #define NEIGH_PRINTK(x...) printk(x)
45 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
46 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
47 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
48
49 #if NEIGH_DEBUG >= 1
50 #undef NEIGH_PRINTK1
51 #define NEIGH_PRINTK1 NEIGH_PRINTK
52 #endif
53 #if NEIGH_DEBUG >= 2
54 #undef NEIGH_PRINTK2
55 #define NEIGH_PRINTK2 NEIGH_PRINTK
56 #endif
57
58 #define PNEIGH_HASHMASK         0xF
59
60 static void neigh_timer_handler(unsigned long arg);
61 static void __neigh_notify(struct neighbour *n, int type, int flags);
62 static void neigh_update_notify(struct neighbour *neigh);
63 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
64
65 static struct neigh_table *neigh_tables;
66 #ifdef CONFIG_PROC_FS
67 static const struct file_operations neigh_stat_seq_fops;
68 #endif
69
70 /*
71    Neighbour hash table buckets are protected with rwlock tbl->lock.
72
73    - All the scans/updates to hash buckets MUST be made under this lock.
74    - NOTHING clever should be made under this lock: no callbacks
75      to protocol backends, no attempts to send something to network.
76      It will result in deadlocks, if backend/driver wants to use neighbour
77      cache.
78    - If the entry requires some non-trivial actions, increase
79      its reference count and release table lock.
80
81    Neighbour entries are protected:
82    - with reference count.
83    - with rwlock neigh->lock
84
85    Reference count prevents destruction.
86
87    neigh->lock mainly serializes ll address data and its validity state.
88    However, the same lock is used to protect another entry fields:
89     - timer
90     - resolution queue
91
92    Again, nothing clever shall be made under neigh->lock,
93    the most complicated procedure, which we allow is dev->hard_header.
94    It is supposed, that dev->hard_header is simplistic and does
95    not make callbacks to neighbour tables.
96
97    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
98    list of neighbour tables. This list is used only in process context,
99  */
100
101 static DEFINE_RWLOCK(neigh_tbl_lock);
102
103 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
104 {
105         kfree_skb(skb);
106         return -ENETDOWN;
107 }
108
109 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 {
111         if (neigh->parms->neigh_cleanup)
112                 neigh->parms->neigh_cleanup(neigh);
113
114         __neigh_notify(neigh, RTM_DELNEIGH, 0);
115         neigh_release(neigh);
116 }
117
118 /*
119  * It is random distribution in the interval (1/2)*base...(3/2)*base.
120  * It corresponds to default IPv6 settings and is not overridable,
121  * because it is really reasonable choice.
122  */
123
124 unsigned long neigh_rand_reach_time(unsigned long base)
125 {
126         return base ? (net_random() % base) + (base >> 1) : 0;
127 }
128 EXPORT_SYMBOL(neigh_rand_reach_time);
129
130
131 static int neigh_forced_gc(struct neigh_table *tbl)
132 {
133         int shrunk = 0;
134         int i;
135         struct neigh_hash_table *nht;
136
137         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
138
139         write_lock_bh(&tbl->lock);
140         nht = rcu_dereference_protected(tbl->nht,
141                                         lockdep_is_held(&tbl->lock));
142         for (i = 0; i < (1 << nht->hash_shift); i++) {
143                 struct neighbour *n;
144                 struct neighbour __rcu **np;
145
146                 np = &nht->hash_buckets[i];
147                 while ((n = rcu_dereference_protected(*np,
148                                         lockdep_is_held(&tbl->lock))) != NULL) {
149                         /* Neighbour record may be discarded if:
150                          * - nobody refers to it.
151                          * - it is not permanent
152                          */
153                         write_lock(&n->lock);
154                         if (atomic_read(&n->refcnt) == 1 &&
155                             !(n->nud_state & NUD_PERMANENT)) {
156                                 rcu_assign_pointer(*np,
157                                         rcu_dereference_protected(n->next,
158                                                   lockdep_is_held(&tbl->lock)));
159                                 n->dead = 1;
160                                 shrunk  = 1;
161                                 write_unlock(&n->lock);
162                                 neigh_cleanup_and_release(n);
163                                 continue;
164                         }
165                         write_unlock(&n->lock);
166                         np = &n->next;
167                 }
168         }
169
170         tbl->last_flush = jiffies;
171
172         write_unlock_bh(&tbl->lock);
173
174         return shrunk;
175 }
176
177 static void neigh_add_timer(struct neighbour *n, unsigned long when)
178 {
179         neigh_hold(n);
180         if (unlikely(mod_timer(&n->timer, when))) {
181                 printk("NEIGH: BUG, double timer add, state is %x\n",
182                        n->nud_state);
183                 dump_stack();
184         }
185 }
186
187 static int neigh_del_timer(struct neighbour *n)
188 {
189         if ((n->nud_state & NUD_IN_TIMER) &&
190             del_timer(&n->timer)) {
191                 neigh_release(n);
192                 return 1;
193         }
194         return 0;
195 }
196
197 static void pneigh_queue_purge(struct sk_buff_head *list)
198 {
199         struct sk_buff *skb;
200
201         while ((skb = skb_dequeue(list)) != NULL) {
202                 dev_put(skb->dev);
203                 kfree_skb(skb);
204         }
205 }
206
207 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
208 {
209         int i;
210         struct neigh_hash_table *nht;
211
212         nht = rcu_dereference_protected(tbl->nht,
213                                         lockdep_is_held(&tbl->lock));
214
215         for (i = 0; i < (1 << nht->hash_shift); i++) {
216                 struct neighbour *n;
217                 struct neighbour __rcu **np = &nht->hash_buckets[i];
218
219                 while ((n = rcu_dereference_protected(*np,
220                                         lockdep_is_held(&tbl->lock))) != NULL) {
221                         if (dev && n->dev != dev) {
222                                 np = &n->next;
223                                 continue;
224                         }
225                         rcu_assign_pointer(*np,
226                                    rcu_dereference_protected(n->next,
227                                                 lockdep_is_held(&tbl->lock)));
228                         write_lock(&n->lock);
229                         neigh_del_timer(n);
230                         n->dead = 1;
231
232                         if (atomic_read(&n->refcnt) != 1) {
233                                 /* The most unpleasant situation.
234                                    We must destroy neighbour entry,
235                                    but someone still uses it.
236
237                                    The destroy will be delayed until
238                                    the last user releases us, but
239                                    we must kill timers etc. and move
240                                    it to safe state.
241                                  */
242                                 skb_queue_purge(&n->arp_queue);
243                                 n->arp_queue_len_bytes = 0;
244                                 n->output = neigh_blackhole;
245                                 if (n->nud_state & NUD_VALID)
246                                         n->nud_state = NUD_NOARP;
247                                 else
248                                         n->nud_state = NUD_NONE;
249                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
250                         }
251                         write_unlock(&n->lock);
252                         neigh_cleanup_and_release(n);
253                 }
254         }
255 }
256
257 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         write_unlock_bh(&tbl->lock);
262 }
263 EXPORT_SYMBOL(neigh_changeaddr);
264
265 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
266 {
267         write_lock_bh(&tbl->lock);
268         neigh_flush_dev(tbl, dev);
269         pneigh_ifdown(tbl, dev);
270         write_unlock_bh(&tbl->lock);
271
272         del_timer_sync(&tbl->proxy_timer);
273         pneigh_queue_purge(&tbl->proxy_queue);
274         return 0;
275 }
276 EXPORT_SYMBOL(neigh_ifdown);
277
278 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
279 {
280         struct neighbour *n = NULL;
281         unsigned long now = jiffies;
282         int entries;
283
284         entries = atomic_inc_return(&tbl->entries) - 1;
285         if (entries >= tbl->gc_thresh3 ||
286             (entries >= tbl->gc_thresh2 &&
287              time_after(now, tbl->last_flush + 5 * HZ))) {
288                 if (!neigh_forced_gc(tbl) &&
289                     entries >= tbl->gc_thresh3)
290                         goto out_entries;
291         }
292
293         if (tbl->entry_size)
294                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295         else {
296                 int sz = sizeof(*n) + tbl->key_len;
297
298                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299                 sz += dev->neigh_priv_len;
300                 n = kzalloc(sz, GFP_ATOMIC);
301         }
302         if (!n)
303                 goto out_entries;
304
305         skb_queue_head_init(&n->arp_queue);
306         rwlock_init(&n->lock);
307         seqlock_init(&n->ha_lock);
308         n->updated        = n->used = now;
309         n->nud_state      = NUD_NONE;
310         n->output         = neigh_blackhole;
311         seqlock_init(&n->hh.hh_lock);
312         n->parms          = neigh_parms_clone(&tbl->parms);
313         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
314
315         NEIGH_CACHE_STAT_INC(tbl, allocs);
316         n->tbl            = tbl;
317         atomic_set(&n->refcnt, 1);
318         n->dead           = 1;
319 out:
320         return n;
321
322 out_entries:
323         atomic_dec(&tbl->entries);
324         goto out;
325 }
326
327 static void neigh_get_hash_rnd(u32 *x)
328 {
329         get_random_bytes(x, sizeof(*x));
330         *x |= 1;
331 }
332
333 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
334 {
335         size_t size = (1 << shift) * sizeof(struct neighbour *);
336         struct neigh_hash_table *ret;
337         struct neighbour __rcu **buckets;
338         int i;
339
340         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
341         if (!ret)
342                 return NULL;
343         if (size <= PAGE_SIZE)
344                 buckets = kzalloc(size, GFP_ATOMIC);
345         else
346                 buckets = (struct neighbour __rcu **)
347                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
348                                            get_order(size));
349         if (!buckets) {
350                 kfree(ret);
351                 return NULL;
352         }
353         ret->hash_buckets = buckets;
354         ret->hash_shift = shift;
355         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
356                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
357         return ret;
358 }
359
360 static void neigh_hash_free_rcu(struct rcu_head *head)
361 {
362         struct neigh_hash_table *nht = container_of(head,
363                                                     struct neigh_hash_table,
364                                                     rcu);
365         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
366         struct neighbour __rcu **buckets = nht->hash_buckets;
367
368         if (size <= PAGE_SIZE)
369                 kfree(buckets);
370         else
371                 free_pages((unsigned long)buckets, get_order(size));
372         kfree(nht);
373 }
374
375 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
376                                                 unsigned long new_shift)
377 {
378         unsigned int i, hash;
379         struct neigh_hash_table *new_nht, *old_nht;
380
381         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
382
383         old_nht = rcu_dereference_protected(tbl->nht,
384                                             lockdep_is_held(&tbl->lock));
385         new_nht = neigh_hash_alloc(new_shift);
386         if (!new_nht)
387                 return old_nht;
388
389         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
390                 struct neighbour *n, *next;
391
392                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
393                                                    lockdep_is_held(&tbl->lock));
394                      n != NULL;
395                      n = next) {
396                         hash = tbl->hash(n->primary_key, n->dev,
397                                          new_nht->hash_rnd);
398
399                         hash >>= (32 - new_nht->hash_shift);
400                         next = rcu_dereference_protected(n->next,
401                                                 lockdep_is_held(&tbl->lock));
402
403                         rcu_assign_pointer(n->next,
404                                            rcu_dereference_protected(
405                                                 new_nht->hash_buckets[hash],
406                                                 lockdep_is_held(&tbl->lock)));
407                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
408                 }
409         }
410
411         rcu_assign_pointer(tbl->nht, new_nht);
412         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
413         return new_nht;
414 }
415
416 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
417                                struct net_device *dev)
418 {
419         struct neighbour *n;
420         int key_len = tbl->key_len;
421         u32 hash_val;
422         struct neigh_hash_table *nht;
423
424         NEIGH_CACHE_STAT_INC(tbl, lookups);
425
426         rcu_read_lock_bh();
427         nht = rcu_dereference_bh(tbl->nht);
428         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
429
430         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431              n != NULL;
432              n = rcu_dereference_bh(n->next)) {
433                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
434                         if (!atomic_inc_not_zero(&n->refcnt))
435                                 n = NULL;
436                         NEIGH_CACHE_STAT_INC(tbl, hits);
437                         break;
438                 }
439         }
440
441         rcu_read_unlock_bh();
442         return n;
443 }
444 EXPORT_SYMBOL(neigh_lookup);
445
446 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
447                                      const void *pkey)
448 {
449         struct neighbour *n;
450         int key_len = tbl->key_len;
451         u32 hash_val;
452         struct neigh_hash_table *nht;
453
454         NEIGH_CACHE_STAT_INC(tbl, lookups);
455
456         rcu_read_lock_bh();
457         nht = rcu_dereference_bh(tbl->nht);
458         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
459
460         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
461              n != NULL;
462              n = rcu_dereference_bh(n->next)) {
463                 if (!memcmp(n->primary_key, pkey, key_len) &&
464                     net_eq(dev_net(n->dev), net)) {
465                         if (!atomic_inc_not_zero(&n->refcnt))
466                                 n = NULL;
467                         NEIGH_CACHE_STAT_INC(tbl, hits);
468                         break;
469                 }
470         }
471
472         rcu_read_unlock_bh();
473         return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup_nodev);
476
477 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
478                                  struct net_device *dev, bool want_ref)
479 {
480         u32 hash_val;
481         int key_len = tbl->key_len;
482         int error;
483         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484         struct neigh_hash_table *nht;
485
486         if (!n) {
487                 rc = ERR_PTR(-ENOBUFS);
488                 goto out;
489         }
490
491         memcpy(n->primary_key, pkey, key_len);
492         n->dev = dev;
493         dev_hold(dev);
494
495         /* Protocol specific setup. */
496         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
497                 rc = ERR_PTR(error);
498                 goto out_neigh_release;
499         }
500
501         if (dev->netdev_ops->ndo_neigh_construct) {
502                 error = dev->netdev_ops->ndo_neigh_construct(n);
503                 if (error < 0) {
504                         rc = ERR_PTR(error);
505                         goto out_neigh_release;
506                 }
507         }
508
509         /* Device specific setup. */
510         if (n->parms->neigh_setup &&
511             (error = n->parms->neigh_setup(n)) < 0) {
512                 rc = ERR_PTR(error);
513                 goto out_neigh_release;
514         }
515
516         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
517
518         write_lock_bh(&tbl->lock);
519         nht = rcu_dereference_protected(tbl->nht,
520                                         lockdep_is_held(&tbl->lock));
521
522         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
523                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
524
525         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
526
527         if (n->parms->dead) {
528                 rc = ERR_PTR(-EINVAL);
529                 goto out_tbl_unlock;
530         }
531
532         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
533                                             lockdep_is_held(&tbl->lock));
534              n1 != NULL;
535              n1 = rcu_dereference_protected(n1->next,
536                         lockdep_is_held(&tbl->lock))) {
537                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538                         if (want_ref)
539                                 neigh_hold(n1);
540                         rc = n1;
541                         goto out_tbl_unlock;
542                 }
543         }
544
545         n->dead = 0;
546         if (want_ref)
547                 neigh_hold(n);
548         rcu_assign_pointer(n->next,
549                            rcu_dereference_protected(nht->hash_buckets[hash_val],
550                                                      lockdep_is_held(&tbl->lock)));
551         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
552         write_unlock_bh(&tbl->lock);
553         NEIGH_PRINTK2("neigh %p is created.\n", n);
554         rc = n;
555 out:
556         return rc;
557 out_tbl_unlock:
558         write_unlock_bh(&tbl->lock);
559 out_neigh_release:
560         neigh_release(n);
561         goto out;
562 }
563 EXPORT_SYMBOL(__neigh_create);
564
565 static u32 pneigh_hash(const void *pkey, int key_len)
566 {
567         u32 hash_val = *(u32 *)(pkey + key_len - 4);
568         hash_val ^= (hash_val >> 16);
569         hash_val ^= hash_val >> 8;
570         hash_val ^= hash_val >> 4;
571         hash_val &= PNEIGH_HASHMASK;
572         return hash_val;
573 }
574
575 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
576                                               struct net *net,
577                                               const void *pkey,
578                                               int key_len,
579                                               struct net_device *dev)
580 {
581         while (n) {
582                 if (!memcmp(n->key, pkey, key_len) &&
583                     net_eq(pneigh_net(n), net) &&
584                     (n->dev == dev || !n->dev))
585                         return n;
586                 n = n->next;
587         }
588         return NULL;
589 }
590
591 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
592                 struct net *net, const void *pkey, struct net_device *dev)
593 {
594         int key_len = tbl->key_len;
595         u32 hash_val = pneigh_hash(pkey, key_len);
596
597         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
598                                  net, pkey, key_len, dev);
599 }
600 EXPORT_SYMBOL_GPL(__pneigh_lookup);
601
602 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
603                                     struct net *net, const void *pkey,
604                                     struct net_device *dev, int creat)
605 {
606         struct pneigh_entry *n;
607         int key_len = tbl->key_len;
608         u32 hash_val = pneigh_hash(pkey, key_len);
609
610         read_lock_bh(&tbl->lock);
611         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
612                               net, pkey, key_len, dev);
613         read_unlock_bh(&tbl->lock);
614
615         if (n || !creat)
616                 goto out;
617
618         ASSERT_RTNL();
619
620         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
621         if (!n)
622                 goto out;
623
624         write_pnet(&n->net, hold_net(net));
625         memcpy(n->key, pkey, key_len);
626         n->dev = dev;
627         if (dev)
628                 dev_hold(dev);
629
630         if (tbl->pconstructor && tbl->pconstructor(n)) {
631                 if (dev)
632                         dev_put(dev);
633                 release_net(net);
634                 kfree(n);
635                 n = NULL;
636                 goto out;
637         }
638
639         write_lock_bh(&tbl->lock);
640         n->next = tbl->phash_buckets[hash_val];
641         tbl->phash_buckets[hash_val] = n;
642         write_unlock_bh(&tbl->lock);
643 out:
644         return n;
645 }
646 EXPORT_SYMBOL(pneigh_lookup);
647
648
649 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
650                   struct net_device *dev)
651 {
652         struct pneigh_entry *n, **np;
653         int key_len = tbl->key_len;
654         u32 hash_val = pneigh_hash(pkey, key_len);
655
656         write_lock_bh(&tbl->lock);
657         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
658              np = &n->next) {
659                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
660                     net_eq(pneigh_net(n), net)) {
661                         *np = n->next;
662                         write_unlock_bh(&tbl->lock);
663                         if (tbl->pdestructor)
664                                 tbl->pdestructor(n);
665                         if (n->dev)
666                                 dev_put(n->dev);
667                         release_net(pneigh_net(n));
668                         kfree(n);
669                         return 0;
670                 }
671         }
672         write_unlock_bh(&tbl->lock);
673         return -ENOENT;
674 }
675
676 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
677 {
678         struct pneigh_entry *n, **np;
679         u32 h;
680
681         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
682                 np = &tbl->phash_buckets[h];
683                 while ((n = *np) != NULL) {
684                         if (!dev || n->dev == dev) {
685                                 *np = n->next;
686                                 if (tbl->pdestructor)
687                                         tbl->pdestructor(n);
688                                 if (n->dev)
689                                         dev_put(n->dev);
690                                 release_net(pneigh_net(n));
691                                 kfree(n);
692                                 continue;
693                         }
694                         np = &n->next;
695                 }
696         }
697         return -ENOENT;
698 }
699
700 static void neigh_parms_destroy(struct neigh_parms *parms);
701
702 static inline void neigh_parms_put(struct neigh_parms *parms)
703 {
704         if (atomic_dec_and_test(&parms->refcnt))
705                 neigh_parms_destroy(parms);
706 }
707
708 /*
709  *      neighbour must already be out of the table;
710  *
711  */
712 void neigh_destroy(struct neighbour *neigh)
713 {
714         struct net_device *dev = neigh->dev;
715
716         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
717
718         if (!neigh->dead) {
719                 pr_warn("Destroying alive neighbour %p\n", neigh);
720                 dump_stack();
721                 return;
722         }
723
724         if (neigh_del_timer(neigh))
725                 pr_warn("Impossible event\n");
726
727         skb_queue_purge(&neigh->arp_queue);
728         neigh->arp_queue_len_bytes = 0;
729
730         if (dev->netdev_ops->ndo_neigh_destroy)
731                 dev->netdev_ops->ndo_neigh_destroy(neigh);
732
733         dev_put(dev);
734         neigh_parms_put(neigh->parms);
735
736         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
737
738         atomic_dec(&neigh->tbl->entries);
739         kfree_rcu(neigh, rcu);
740 }
741 EXPORT_SYMBOL(neigh_destroy);
742
743 /* Neighbour state is suspicious;
744    disable fast path.
745
746    Called with write_locked neigh.
747  */
748 static void neigh_suspect(struct neighbour *neigh)
749 {
750         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
751
752         neigh->output = neigh->ops->output;
753 }
754
755 /* Neighbour state is OK;
756    enable fast path.
757
758    Called with write_locked neigh.
759  */
760 static void neigh_connect(struct neighbour *neigh)
761 {
762         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
763
764         neigh->output = neigh->ops->connected_output;
765 }
766
767 static void neigh_periodic_work(struct work_struct *work)
768 {
769         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
770         struct neighbour *n;
771         struct neighbour __rcu **np;
772         unsigned int i;
773         struct neigh_hash_table *nht;
774
775         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
776
777         write_lock_bh(&tbl->lock);
778         nht = rcu_dereference_protected(tbl->nht,
779                                         lockdep_is_held(&tbl->lock));
780
781         /*
782          *      periodically recompute ReachableTime from random function
783          */
784
785         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
786                 struct neigh_parms *p;
787                 tbl->last_rand = jiffies;
788                 for (p = &tbl->parms; p; p = p->next)
789                         p->reachable_time =
790                                 neigh_rand_reach_time(p->base_reachable_time);
791         }
792
793         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
794                 np = &nht->hash_buckets[i];
795
796                 while ((n = rcu_dereference_protected(*np,
797                                 lockdep_is_held(&tbl->lock))) != NULL) {
798                         unsigned int state;
799
800                         write_lock(&n->lock);
801
802                         state = n->nud_state;
803                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
804                                 write_unlock(&n->lock);
805                                 goto next_elt;
806                         }
807
808                         if (time_before(n->used, n->confirmed))
809                                 n->used = n->confirmed;
810
811                         if (atomic_read(&n->refcnt) == 1 &&
812                             (state == NUD_FAILED ||
813                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
814                                 *np = n->next;
815                                 n->dead = 1;
816                                 write_unlock(&n->lock);
817                                 neigh_cleanup_and_release(n);
818                                 continue;
819                         }
820                         write_unlock(&n->lock);
821
822 next_elt:
823                         np = &n->next;
824                 }
825                 /*
826                  * It's fine to release lock here, even if hash table
827                  * grows while we are preempted.
828                  */
829                 write_unlock_bh(&tbl->lock);
830                 cond_resched();
831                 write_lock_bh(&tbl->lock);
832                 nht = rcu_dereference_protected(tbl->nht,
833                                                 lockdep_is_held(&tbl->lock));
834         }
835         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
836          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
837          * base_reachable_time.
838          */
839         schedule_delayed_work(&tbl->gc_work,
840                               tbl->parms.base_reachable_time >> 1);
841         write_unlock_bh(&tbl->lock);
842 }
843
844 static __inline__ int neigh_max_probes(struct neighbour *n)
845 {
846         struct neigh_parms *p = n->parms;
847         return (n->nud_state & NUD_PROBE) ?
848                 p->ucast_probes :
849                 p->ucast_probes + p->app_probes + p->mcast_probes;
850 }
851
852 static void neigh_invalidate(struct neighbour *neigh)
853         __releases(neigh->lock)
854         __acquires(neigh->lock)
855 {
856         struct sk_buff *skb;
857
858         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
859         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
860         neigh->updated = jiffies;
861
862         /* It is very thin place. report_unreachable is very complicated
863            routine. Particularly, it can hit the same neighbour entry!
864
865            So that, we try to be accurate and avoid dead loop. --ANK
866          */
867         while (neigh->nud_state == NUD_FAILED &&
868                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
869                 write_unlock(&neigh->lock);
870                 neigh->ops->error_report(neigh, skb);
871                 write_lock(&neigh->lock);
872         }
873         skb_queue_purge(&neigh->arp_queue);
874         neigh->arp_queue_len_bytes = 0;
875 }
876
877 static void neigh_probe(struct neighbour *neigh)
878         __releases(neigh->lock)
879 {
880         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
881         /* keep skb alive even if arp_queue overflows */
882         if (skb)
883                 skb = skb_copy(skb, GFP_ATOMIC);
884         write_unlock(&neigh->lock);
885         neigh->ops->solicit(neigh, skb);
886         atomic_inc(&neigh->probes);
887         kfree_skb(skb);
888 }
889
890 /* Called when a timer expires for a neighbour entry. */
891
892 static void neigh_timer_handler(unsigned long arg)
893 {
894         unsigned long now, next;
895         struct neighbour *neigh = (struct neighbour *)arg;
896         unsigned int state;
897         int notify = 0;
898
899         write_lock(&neigh->lock);
900
901         state = neigh->nud_state;
902         now = jiffies;
903         next = now + HZ;
904
905         if (!(state & NUD_IN_TIMER))
906                 goto out;
907
908         if (state & NUD_REACHABLE) {
909                 if (time_before_eq(now,
910                                    neigh->confirmed + neigh->parms->reachable_time)) {
911                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
912                         next = neigh->confirmed + neigh->parms->reachable_time;
913                 } else if (time_before_eq(now,
914                                           neigh->used + neigh->parms->delay_probe_time)) {
915                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
916                         neigh->nud_state = NUD_DELAY;
917                         neigh->updated = jiffies;
918                         neigh_suspect(neigh);
919                         next = now + neigh->parms->delay_probe_time;
920                 } else {
921                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
922                         neigh->nud_state = NUD_STALE;
923                         neigh->updated = jiffies;
924                         neigh_suspect(neigh);
925                         notify = 1;
926                 }
927         } else if (state & NUD_DELAY) {
928                 if (time_before_eq(now,
929                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
930                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
931                         neigh->nud_state = NUD_REACHABLE;
932                         neigh->updated = jiffies;
933                         neigh_connect(neigh);
934                         notify = 1;
935                         next = neigh->confirmed + neigh->parms->reachable_time;
936                 } else {
937                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
938                         neigh->nud_state = NUD_PROBE;
939                         neigh->updated = jiffies;
940                         atomic_set(&neigh->probes, 0);
941                         next = now + neigh->parms->retrans_time;
942                 }
943         } else {
944                 /* NUD_PROBE|NUD_INCOMPLETE */
945                 next = now + neigh->parms->retrans_time;
946         }
947
948         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
949             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
950                 neigh->nud_state = NUD_FAILED;
951                 notify = 1;
952                 neigh_invalidate(neigh);
953         }
954
955         if (neigh->nud_state & NUD_IN_TIMER) {
956                 if (time_before(next, jiffies + HZ/2))
957                         next = jiffies + HZ/2;
958                 if (!mod_timer(&neigh->timer, next))
959                         neigh_hold(neigh);
960         }
961         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
962                 neigh_probe(neigh);
963         } else {
964 out:
965                 write_unlock(&neigh->lock);
966         }
967
968         if (notify)
969                 neigh_update_notify(neigh);
970
971         neigh_release(neigh);
972 }
973
974 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
975 {
976         int rc;
977         bool immediate_probe = false;
978
979         write_lock_bh(&neigh->lock);
980
981         rc = 0;
982         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
983                 goto out_unlock_bh;
984
985         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
986                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
987                         unsigned long next, now = jiffies;
988
989                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
990                         neigh->nud_state     = NUD_INCOMPLETE;
991                         neigh->updated = now;
992                         next = now + max(neigh->parms->retrans_time, HZ/2);
993                         neigh_add_timer(neigh, next);
994                         immediate_probe = true;
995                 } else {
996                         neigh->nud_state = NUD_FAILED;
997                         neigh->updated = jiffies;
998                         write_unlock_bh(&neigh->lock);
999
1000                         kfree_skb(skb);
1001                         return 1;
1002                 }
1003         } else if (neigh->nud_state & NUD_STALE) {
1004                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1005                 neigh->nud_state = NUD_DELAY;
1006                 neigh->updated = jiffies;
1007                 neigh_add_timer(neigh,
1008                                 jiffies + neigh->parms->delay_probe_time);
1009         }
1010
1011         if (neigh->nud_state == NUD_INCOMPLETE) {
1012                 if (skb) {
1013                         while (neigh->arp_queue_len_bytes + skb->truesize >
1014                                neigh->parms->queue_len_bytes) {
1015                                 struct sk_buff *buff;
1016
1017                                 buff = __skb_dequeue(&neigh->arp_queue);
1018                                 if (!buff)
1019                                         break;
1020                                 neigh->arp_queue_len_bytes -= buff->truesize;
1021                                 kfree_skb(buff);
1022                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1023                         }
1024                         skb_dst_force(skb);
1025                         __skb_queue_tail(&neigh->arp_queue, skb);
1026                         neigh->arp_queue_len_bytes += skb->truesize;
1027                 }
1028                 rc = 1;
1029         }
1030 out_unlock_bh:
1031         if (immediate_probe)
1032                 neigh_probe(neigh);
1033         else
1034                 write_unlock(&neigh->lock);
1035         local_bh_enable();
1036         return rc;
1037 }
1038 EXPORT_SYMBOL(__neigh_event_send);
1039
1040 static void neigh_update_hhs(struct neighbour *neigh)
1041 {
1042         struct hh_cache *hh;
1043         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1044                 = NULL;
1045
1046         if (neigh->dev->header_ops)
1047                 update = neigh->dev->header_ops->cache_update;
1048
1049         if (update) {
1050                 hh = &neigh->hh;
1051                 if (hh->hh_len) {
1052                         write_seqlock_bh(&hh->hh_lock);
1053                         update(hh, neigh->dev, neigh->ha);
1054                         write_sequnlock_bh(&hh->hh_lock);
1055                 }
1056         }
1057 }
1058
1059
1060
1061 /* Generic update routine.
1062    -- lladdr is new lladdr or NULL, if it is not supplied.
1063    -- new    is new state.
1064    -- flags
1065         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1066                                 if it is different.
1067         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1068                                 lladdr instead of overriding it
1069                                 if it is different.
1070                                 It also allows to retain current state
1071                                 if lladdr is unchanged.
1072         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1073
1074         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1075                                 NTF_ROUTER flag.
1076         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1077                                 a router.
1078
1079    Caller MUST hold reference count on the entry.
1080  */
1081
1082 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1083                  u32 flags)
1084 {
1085         u8 old;
1086         int err;
1087         int notify = 0;
1088         struct net_device *dev;
1089         int update_isrouter = 0;
1090
1091         write_lock_bh(&neigh->lock);
1092
1093         dev    = neigh->dev;
1094         old    = neigh->nud_state;
1095         err    = -EPERM;
1096
1097         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1098             (old & (NUD_NOARP | NUD_PERMANENT)))
1099                 goto out;
1100
1101         if (!(new & NUD_VALID)) {
1102                 neigh_del_timer(neigh);
1103                 if (old & NUD_CONNECTED)
1104                         neigh_suspect(neigh);
1105                 neigh->nud_state = new;
1106                 err = 0;
1107                 notify = old & NUD_VALID;
1108                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1109                     (new & NUD_FAILED)) {
1110                         neigh_invalidate(neigh);
1111                         notify = 1;
1112                 }
1113                 goto out;
1114         }
1115
1116         /* Compare new lladdr with cached one */
1117         if (!dev->addr_len) {
1118                 /* First case: device needs no address. */
1119                 lladdr = neigh->ha;
1120         } else if (lladdr) {
1121                 /* The second case: if something is already cached
1122                    and a new address is proposed:
1123                    - compare new & old
1124                    - if they are different, check override flag
1125                  */
1126                 if ((old & NUD_VALID) &&
1127                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1128                         lladdr = neigh->ha;
1129         } else {
1130                 /* No address is supplied; if we know something,
1131                    use it, otherwise discard the request.
1132                  */
1133                 err = -EINVAL;
1134                 if (!(old & NUD_VALID))
1135                         goto out;
1136                 lladdr = neigh->ha;
1137         }
1138
1139         if (new & NUD_CONNECTED)
1140                 neigh->confirmed = jiffies;
1141         neigh->updated = jiffies;
1142
1143         /* If entry was valid and address is not changed,
1144            do not change entry state, if new one is STALE.
1145          */
1146         err = 0;
1147         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1148         if (old & NUD_VALID) {
1149                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1150                         update_isrouter = 0;
1151                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1152                             (old & NUD_CONNECTED)) {
1153                                 lladdr = neigh->ha;
1154                                 new = NUD_STALE;
1155                         } else
1156                                 goto out;
1157                 } else {
1158                         if (lladdr == neigh->ha && new == NUD_STALE &&
1159                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1160                              (old & NUD_CONNECTED))
1161                             )
1162                                 new = old;
1163                 }
1164         }
1165
1166         if (new != old) {
1167                 neigh_del_timer(neigh);
1168                 if (new & NUD_IN_TIMER)
1169                         neigh_add_timer(neigh, (jiffies +
1170                                                 ((new & NUD_REACHABLE) ?
1171                                                  neigh->parms->reachable_time :
1172                                                  0)));
1173                 neigh->nud_state = new;
1174         }
1175
1176         if (lladdr != neigh->ha) {
1177                 write_seqlock(&neigh->ha_lock);
1178                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1179                 write_sequnlock(&neigh->ha_lock);
1180                 neigh_update_hhs(neigh);
1181                 if (!(new & NUD_CONNECTED))
1182                         neigh->confirmed = jiffies -
1183                                       (neigh->parms->base_reachable_time << 1);
1184                 notify = 1;
1185         }
1186         if (new == old)
1187                 goto out;
1188         if (new & NUD_CONNECTED)
1189                 neigh_connect(neigh);
1190         else
1191                 neigh_suspect(neigh);
1192         if (!(old & NUD_VALID)) {
1193                 struct sk_buff *skb;
1194
1195                 /* Again: avoid dead loop if something went wrong */
1196
1197                 while (neigh->nud_state & NUD_VALID &&
1198                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1199                         struct dst_entry *dst = skb_dst(skb);
1200                         struct neighbour *n2, *n1 = neigh;
1201                         write_unlock_bh(&neigh->lock);
1202
1203                         rcu_read_lock();
1204
1205                         /* Why not just use 'neigh' as-is?  The problem is that
1206                          * things such as shaper, eql, and sch_teql can end up
1207                          * using alternative, different, neigh objects to output
1208                          * the packet in the output path.  So what we need to do
1209                          * here is re-lookup the top-level neigh in the path so
1210                          * we can reinject the packet there.
1211                          */
1212                         n2 = NULL;
1213                         if (dst) {
1214                                 n2 = dst_neigh_lookup_skb(dst, skb);
1215                                 if (n2)
1216                                         n1 = n2;
1217                         }
1218                         n1->output(n1, skb);
1219                         if (n2)
1220                                 neigh_release(n2);
1221                         rcu_read_unlock();
1222
1223                         write_lock_bh(&neigh->lock);
1224                 }
1225                 skb_queue_purge(&neigh->arp_queue);
1226                 neigh->arp_queue_len_bytes = 0;
1227         }
1228 out:
1229         if (update_isrouter) {
1230                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1231                         (neigh->flags | NTF_ROUTER) :
1232                         (neigh->flags & ~NTF_ROUTER);
1233         }
1234         write_unlock_bh(&neigh->lock);
1235
1236         if (notify)
1237                 neigh_update_notify(neigh);
1238
1239         return err;
1240 }
1241 EXPORT_SYMBOL(neigh_update);
1242
1243 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1244                                  u8 *lladdr, void *saddr,
1245                                  struct net_device *dev)
1246 {
1247         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1248                                                  lladdr || !dev->addr_len);
1249         if (neigh)
1250                 neigh_update(neigh, lladdr, NUD_STALE,
1251                              NEIGH_UPDATE_F_OVERRIDE);
1252         return neigh;
1253 }
1254 EXPORT_SYMBOL(neigh_event_ns);
1255
1256 /* called with read_lock_bh(&n->lock); */
1257 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1258 {
1259         struct net_device *dev = dst->dev;
1260         __be16 prot = dst->ops->protocol;
1261         struct hh_cache *hh = &n->hh;
1262
1263         write_lock_bh(&n->lock);
1264
1265         /* Only one thread can come in here and initialize the
1266          * hh_cache entry.
1267          */
1268         if (!hh->hh_len)
1269                 dev->header_ops->cache(n, hh, prot);
1270
1271         write_unlock_bh(&n->lock);
1272 }
1273
1274 /* This function can be used in contexts, where only old dev_queue_xmit
1275  * worked, f.e. if you want to override normal output path (eql, shaper),
1276  * but resolution is not made yet.
1277  */
1278
1279 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1280 {
1281         struct net_device *dev = skb->dev;
1282
1283         __skb_pull(skb, skb_network_offset(skb));
1284
1285         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1286                             skb->len) < 0 &&
1287             dev->header_ops->rebuild(skb))
1288                 return 0;
1289
1290         return dev_queue_xmit(skb);
1291 }
1292 EXPORT_SYMBOL(neigh_compat_output);
1293
1294 /* Slow and careful. */
1295
1296 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1297 {
1298         struct dst_entry *dst = skb_dst(skb);
1299         int rc = 0;
1300
1301         if (!dst)
1302                 goto discard;
1303
1304         if (!neigh_event_send(neigh, skb)) {
1305                 int err;
1306                 struct net_device *dev = neigh->dev;
1307                 unsigned int seq;
1308
1309                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1310                         neigh_hh_init(neigh, dst);
1311
1312                 do {
1313                         __skb_pull(skb, skb_network_offset(skb));
1314                         seq = read_seqbegin(&neigh->ha_lock);
1315                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1316                                               neigh->ha, NULL, skb->len);
1317                 } while (read_seqretry(&neigh->ha_lock, seq));
1318
1319                 if (err >= 0)
1320                         rc = dev_queue_xmit(skb);
1321                 else
1322                         goto out_kfree_skb;
1323         }
1324 out:
1325         return rc;
1326 discard:
1327         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1328                       dst, neigh);
1329 out_kfree_skb:
1330         rc = -EINVAL;
1331         kfree_skb(skb);
1332         goto out;
1333 }
1334 EXPORT_SYMBOL(neigh_resolve_output);
1335
1336 /* As fast as possible without hh cache */
1337
1338 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1339 {
1340         struct net_device *dev = neigh->dev;
1341         unsigned int seq;
1342         int err;
1343
1344         do {
1345                 __skb_pull(skb, skb_network_offset(skb));
1346                 seq = read_seqbegin(&neigh->ha_lock);
1347                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1348                                       neigh->ha, NULL, skb->len);
1349         } while (read_seqretry(&neigh->ha_lock, seq));
1350
1351         if (err >= 0)
1352                 err = dev_queue_xmit(skb);
1353         else {
1354                 err = -EINVAL;
1355                 kfree_skb(skb);
1356         }
1357         return err;
1358 }
1359 EXPORT_SYMBOL(neigh_connected_output);
1360
1361 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1362 {
1363         return dev_queue_xmit(skb);
1364 }
1365 EXPORT_SYMBOL(neigh_direct_output);
1366
1367 static void neigh_proxy_process(unsigned long arg)
1368 {
1369         struct neigh_table *tbl = (struct neigh_table *)arg;
1370         long sched_next = 0;
1371         unsigned long now = jiffies;
1372         struct sk_buff *skb, *n;
1373
1374         spin_lock(&tbl->proxy_queue.lock);
1375
1376         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1377                 long tdif = NEIGH_CB(skb)->sched_next - now;
1378
1379                 if (tdif <= 0) {
1380                         struct net_device *dev = skb->dev;
1381
1382                         __skb_unlink(skb, &tbl->proxy_queue);
1383                         if (tbl->proxy_redo && netif_running(dev)) {
1384                                 rcu_read_lock();
1385                                 tbl->proxy_redo(skb);
1386                                 rcu_read_unlock();
1387                         } else {
1388                                 kfree_skb(skb);
1389                         }
1390
1391                         dev_put(dev);
1392                 } else if (!sched_next || tdif < sched_next)
1393                         sched_next = tdif;
1394         }
1395         del_timer(&tbl->proxy_timer);
1396         if (sched_next)
1397                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1398         spin_unlock(&tbl->proxy_queue.lock);
1399 }
1400
1401 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1402                     struct sk_buff *skb)
1403 {
1404         unsigned long now = jiffies;
1405         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1406
1407         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1408                 kfree_skb(skb);
1409                 return;
1410         }
1411
1412         NEIGH_CB(skb)->sched_next = sched_next;
1413         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1414
1415         spin_lock(&tbl->proxy_queue.lock);
1416         if (del_timer(&tbl->proxy_timer)) {
1417                 if (time_before(tbl->proxy_timer.expires, sched_next))
1418                         sched_next = tbl->proxy_timer.expires;
1419         }
1420         skb_dst_drop(skb);
1421         dev_hold(skb->dev);
1422         __skb_queue_tail(&tbl->proxy_queue, skb);
1423         mod_timer(&tbl->proxy_timer, sched_next);
1424         spin_unlock(&tbl->proxy_queue.lock);
1425 }
1426 EXPORT_SYMBOL(pneigh_enqueue);
1427
1428 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1429                                                       struct net *net, int ifindex)
1430 {
1431         struct neigh_parms *p;
1432
1433         for (p = &tbl->parms; p; p = p->next) {
1434                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1435                     (!p->dev && !ifindex))
1436                         return p;
1437         }
1438
1439         return NULL;
1440 }
1441
1442 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1443                                       struct neigh_table *tbl)
1444 {
1445         struct neigh_parms *p, *ref;
1446         struct net *net = dev_net(dev);
1447         const struct net_device_ops *ops = dev->netdev_ops;
1448
1449         ref = lookup_neigh_parms(tbl, net, 0);
1450         if (!ref)
1451                 return NULL;
1452
1453         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1454         if (p) {
1455                 p->tbl            = tbl;
1456                 atomic_set(&p->refcnt, 1);
1457                 p->reachable_time =
1458                                 neigh_rand_reach_time(p->base_reachable_time);
1459
1460                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1461                         kfree(p);
1462                         return NULL;
1463                 }
1464
1465                 dev_hold(dev);
1466                 p->dev = dev;
1467                 write_pnet(&p->net, hold_net(net));
1468                 p->sysctl_table = NULL;
1469                 write_lock_bh(&tbl->lock);
1470                 p->next         = tbl->parms.next;
1471                 tbl->parms.next = p;
1472                 write_unlock_bh(&tbl->lock);
1473         }
1474         return p;
1475 }
1476 EXPORT_SYMBOL(neigh_parms_alloc);
1477
1478 static void neigh_rcu_free_parms(struct rcu_head *head)
1479 {
1480         struct neigh_parms *parms =
1481                 container_of(head, struct neigh_parms, rcu_head);
1482
1483         neigh_parms_put(parms);
1484 }
1485
1486 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1487 {
1488         struct neigh_parms **p;
1489
1490         if (!parms || parms == &tbl->parms)
1491                 return;
1492         write_lock_bh(&tbl->lock);
1493         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1494                 if (*p == parms) {
1495                         *p = parms->next;
1496                         parms->dead = 1;
1497                         write_unlock_bh(&tbl->lock);
1498                         if (parms->dev)
1499                                 dev_put(parms->dev);
1500                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1501                         return;
1502                 }
1503         }
1504         write_unlock_bh(&tbl->lock);
1505         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1506 }
1507 EXPORT_SYMBOL(neigh_parms_release);
1508
1509 static void neigh_parms_destroy(struct neigh_parms *parms)
1510 {
1511         release_net(neigh_parms_net(parms));
1512         kfree(parms);
1513 }
1514
1515 static struct lock_class_key neigh_table_proxy_queue_class;
1516
1517 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1518 {
1519         unsigned long now = jiffies;
1520         unsigned long phsize;
1521
1522         write_pnet(&tbl->parms.net, &init_net);
1523         atomic_set(&tbl->parms.refcnt, 1);
1524         tbl->parms.reachable_time =
1525                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1526
1527         tbl->stats = alloc_percpu(struct neigh_statistics);
1528         if (!tbl->stats)
1529                 panic("cannot create neighbour cache statistics");
1530
1531 #ifdef CONFIG_PROC_FS
1532         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1533                               &neigh_stat_seq_fops, tbl))
1534                 panic("cannot create neighbour proc dir entry");
1535 #endif
1536
1537         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1538
1539         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1540         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1541
1542         if (!tbl->nht || !tbl->phash_buckets)
1543                 panic("cannot allocate neighbour cache hashes");
1544
1545         rwlock_init(&tbl->lock);
1546         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1547         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1548         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1549         skb_queue_head_init_class(&tbl->proxy_queue,
1550                         &neigh_table_proxy_queue_class);
1551
1552         tbl->last_flush = now;
1553         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1554 }
1555
1556 void neigh_table_init(struct neigh_table *tbl)
1557 {
1558         struct neigh_table *tmp;
1559
1560         neigh_table_init_no_netlink(tbl);
1561         write_lock(&neigh_tbl_lock);
1562         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1563                 if (tmp->family == tbl->family)
1564                         break;
1565         }
1566         tbl->next       = neigh_tables;
1567         neigh_tables    = tbl;
1568         write_unlock(&neigh_tbl_lock);
1569
1570         if (unlikely(tmp)) {
1571                 pr_err("Registering multiple tables for family %d\n",
1572                        tbl->family);
1573                 dump_stack();
1574         }
1575 }
1576 EXPORT_SYMBOL(neigh_table_init);
1577
1578 int neigh_table_clear(struct neigh_table *tbl)
1579 {
1580         struct neigh_table **tp;
1581
1582         /* It is not clean... Fix it to unload IPv6 module safely */
1583         cancel_delayed_work_sync(&tbl->gc_work);
1584         del_timer_sync(&tbl->proxy_timer);
1585         pneigh_queue_purge(&tbl->proxy_queue);
1586         neigh_ifdown(tbl, NULL);
1587         if (atomic_read(&tbl->entries))
1588                 pr_crit("neighbour leakage\n");
1589         write_lock(&neigh_tbl_lock);
1590         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1591                 if (*tp == tbl) {
1592                         *tp = tbl->next;
1593                         break;
1594                 }
1595         }
1596         write_unlock(&neigh_tbl_lock);
1597
1598         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1599                  neigh_hash_free_rcu);
1600         tbl->nht = NULL;
1601
1602         kfree(tbl->phash_buckets);
1603         tbl->phash_buckets = NULL;
1604
1605         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1606
1607         free_percpu(tbl->stats);
1608         tbl->stats = NULL;
1609
1610         return 0;
1611 }
1612 EXPORT_SYMBOL(neigh_table_clear);
1613
1614 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1615 {
1616         struct net *net = sock_net(skb->sk);
1617         struct ndmsg *ndm;
1618         struct nlattr *dst_attr;
1619         struct neigh_table *tbl;
1620         struct net_device *dev = NULL;
1621         int err = -EINVAL;
1622
1623         ASSERT_RTNL();
1624         if (nlmsg_len(nlh) < sizeof(*ndm))
1625                 goto out;
1626
1627         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1628         if (dst_attr == NULL)
1629                 goto out;
1630
1631         ndm = nlmsg_data(nlh);
1632         if (ndm->ndm_ifindex) {
1633                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1634                 if (dev == NULL) {
1635                         err = -ENODEV;
1636                         goto out;
1637                 }
1638         }
1639
1640         read_lock(&neigh_tbl_lock);
1641         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1642                 struct neighbour *neigh;
1643
1644                 if (tbl->family != ndm->ndm_family)
1645                         continue;
1646                 read_unlock(&neigh_tbl_lock);
1647
1648                 if (nla_len(dst_attr) < tbl->key_len)
1649                         goto out;
1650
1651                 if (ndm->ndm_flags & NTF_PROXY) {
1652                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1653                         goto out;
1654                 }
1655
1656                 if (dev == NULL)
1657                         goto out;
1658
1659                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1660                 if (neigh == NULL) {
1661                         err = -ENOENT;
1662                         goto out;
1663                 }
1664
1665                 err = neigh_update(neigh, NULL, NUD_FAILED,
1666                                    NEIGH_UPDATE_F_OVERRIDE |
1667                                    NEIGH_UPDATE_F_ADMIN);
1668                 neigh_release(neigh);
1669                 goto out;
1670         }
1671         read_unlock(&neigh_tbl_lock);
1672         err = -EAFNOSUPPORT;
1673
1674 out:
1675         return err;
1676 }
1677
1678 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1679 {
1680         struct net *net = sock_net(skb->sk);
1681         struct ndmsg *ndm;
1682         struct nlattr *tb[NDA_MAX+1];
1683         struct neigh_table *tbl;
1684         struct net_device *dev = NULL;
1685         int err;
1686
1687         ASSERT_RTNL();
1688         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1689         if (err < 0)
1690                 goto out;
1691
1692         err = -EINVAL;
1693         if (tb[NDA_DST] == NULL)
1694                 goto out;
1695
1696         ndm = nlmsg_data(nlh);
1697         if (ndm->ndm_ifindex) {
1698                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1699                 if (dev == NULL) {
1700                         err = -ENODEV;
1701                         goto out;
1702                 }
1703
1704                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1705                         goto out;
1706         }
1707
1708         read_lock(&neigh_tbl_lock);
1709         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1710                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1711                 struct neighbour *neigh;
1712                 void *dst, *lladdr;
1713
1714                 if (tbl->family != ndm->ndm_family)
1715                         continue;
1716                 read_unlock(&neigh_tbl_lock);
1717
1718                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1719                         goto out;
1720                 dst = nla_data(tb[NDA_DST]);
1721                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1722
1723                 if (ndm->ndm_flags & NTF_PROXY) {
1724                         struct pneigh_entry *pn;
1725
1726                         err = -ENOBUFS;
1727                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1728                         if (pn) {
1729                                 pn->flags = ndm->ndm_flags;
1730                                 err = 0;
1731                         }
1732                         goto out;
1733                 }
1734
1735                 if (dev == NULL)
1736                         goto out;
1737
1738                 neigh = neigh_lookup(tbl, dst, dev);
1739                 if (neigh == NULL) {
1740                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1741                                 err = -ENOENT;
1742                                 goto out;
1743                         }
1744
1745                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1746                         if (IS_ERR(neigh)) {
1747                                 err = PTR_ERR(neigh);
1748                                 goto out;
1749                         }
1750                 } else {
1751                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1752                                 err = -EEXIST;
1753                                 neigh_release(neigh);
1754                                 goto out;
1755                         }
1756
1757                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1758                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1759                 }
1760
1761                 if (ndm->ndm_flags & NTF_USE) {
1762                         neigh_event_send(neigh, NULL);
1763                         err = 0;
1764                 } else
1765                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1766                 neigh_release(neigh);
1767                 goto out;
1768         }
1769
1770         read_unlock(&neigh_tbl_lock);
1771         err = -EAFNOSUPPORT;
1772 out:
1773         return err;
1774 }
1775
1776 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1777 {
1778         struct nlattr *nest;
1779
1780         nest = nla_nest_start(skb, NDTA_PARMS);
1781         if (nest == NULL)
1782                 return -ENOBUFS;
1783
1784         if ((parms->dev &&
1785              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1786             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1787             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1788             /* approximative value for deprecated QUEUE_LEN (in packets) */
1789             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1790                         DIV_ROUND_UP(parms->queue_len_bytes,
1791                                      SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1792             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1793             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1794             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1795             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1796             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1797             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1798                           parms->base_reachable_time) ||
1799             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1800             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1801                           parms->delay_probe_time) ||
1802             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1803             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1804             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1805             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1806                 goto nla_put_failure;
1807         return nla_nest_end(skb, nest);
1808
1809 nla_put_failure:
1810         nla_nest_cancel(skb, nest);
1811         return -EMSGSIZE;
1812 }
1813
1814 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1815                               u32 pid, u32 seq, int type, int flags)
1816 {
1817         struct nlmsghdr *nlh;
1818         struct ndtmsg *ndtmsg;
1819
1820         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1821         if (nlh == NULL)
1822                 return -EMSGSIZE;
1823
1824         ndtmsg = nlmsg_data(nlh);
1825
1826         read_lock_bh(&tbl->lock);
1827         ndtmsg->ndtm_family = tbl->family;
1828         ndtmsg->ndtm_pad1   = 0;
1829         ndtmsg->ndtm_pad2   = 0;
1830
1831         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1832             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1833             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1834             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1835             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1836                 goto nla_put_failure;
1837         {
1838                 unsigned long now = jiffies;
1839                 unsigned int flush_delta = now - tbl->last_flush;
1840                 unsigned int rand_delta = now - tbl->last_rand;
1841                 struct neigh_hash_table *nht;
1842                 struct ndt_config ndc = {
1843                         .ndtc_key_len           = tbl->key_len,
1844                         .ndtc_entry_size        = tbl->entry_size,
1845                         .ndtc_entries           = atomic_read(&tbl->entries),
1846                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1847                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1848                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1849                 };
1850
1851                 rcu_read_lock_bh();
1852                 nht = rcu_dereference_bh(tbl->nht);
1853                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1854                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1855                 rcu_read_unlock_bh();
1856
1857                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1858                         goto nla_put_failure;
1859         }
1860
1861         {
1862                 int cpu;
1863                 struct ndt_stats ndst;
1864
1865                 memset(&ndst, 0, sizeof(ndst));
1866
1867                 for_each_possible_cpu(cpu) {
1868                         struct neigh_statistics *st;
1869
1870                         st = per_cpu_ptr(tbl->stats, cpu);
1871                         ndst.ndts_allocs                += st->allocs;
1872                         ndst.ndts_destroys              += st->destroys;
1873                         ndst.ndts_hash_grows            += st->hash_grows;
1874                         ndst.ndts_res_failed            += st->res_failed;
1875                         ndst.ndts_lookups               += st->lookups;
1876                         ndst.ndts_hits                  += st->hits;
1877                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1878                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1879                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1880                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1881                 }
1882
1883                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1884                         goto nla_put_failure;
1885         }
1886
1887         BUG_ON(tbl->parms.dev);
1888         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1889                 goto nla_put_failure;
1890
1891         read_unlock_bh(&tbl->lock);
1892         return nlmsg_end(skb, nlh);
1893
1894 nla_put_failure:
1895         read_unlock_bh(&tbl->lock);
1896         nlmsg_cancel(skb, nlh);
1897         return -EMSGSIZE;
1898 }
1899
1900 static int neightbl_fill_param_info(struct sk_buff *skb,
1901                                     struct neigh_table *tbl,
1902                                     struct neigh_parms *parms,
1903                                     u32 pid, u32 seq, int type,
1904                                     unsigned int flags)
1905 {
1906         struct ndtmsg *ndtmsg;
1907         struct nlmsghdr *nlh;
1908
1909         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1910         if (nlh == NULL)
1911                 return -EMSGSIZE;
1912
1913         ndtmsg = nlmsg_data(nlh);
1914
1915         read_lock_bh(&tbl->lock);
1916         ndtmsg->ndtm_family = tbl->family;
1917         ndtmsg->ndtm_pad1   = 0;
1918         ndtmsg->ndtm_pad2   = 0;
1919
1920         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1921             neightbl_fill_parms(skb, parms) < 0)
1922                 goto errout;
1923
1924         read_unlock_bh(&tbl->lock);
1925         return nlmsg_end(skb, nlh);
1926 errout:
1927         read_unlock_bh(&tbl->lock);
1928         nlmsg_cancel(skb, nlh);
1929         return -EMSGSIZE;
1930 }
1931
1932 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1933         [NDTA_NAME]             = { .type = NLA_STRING },
1934         [NDTA_THRESH1]          = { .type = NLA_U32 },
1935         [NDTA_THRESH2]          = { .type = NLA_U32 },
1936         [NDTA_THRESH3]          = { .type = NLA_U32 },
1937         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1938         [NDTA_PARMS]            = { .type = NLA_NESTED },
1939 };
1940
1941 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1942         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1943         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1944         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1945         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1946         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1947         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1948         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1949         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1950         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1951         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1952         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1953         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1954         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1955 };
1956
1957 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1958 {
1959         struct net *net = sock_net(skb->sk);
1960         struct neigh_table *tbl;
1961         struct ndtmsg *ndtmsg;
1962         struct nlattr *tb[NDTA_MAX+1];
1963         int err;
1964
1965         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1966                           nl_neightbl_policy);
1967         if (err < 0)
1968                 goto errout;
1969
1970         if (tb[NDTA_NAME] == NULL) {
1971                 err = -EINVAL;
1972                 goto errout;
1973         }
1974
1975         ndtmsg = nlmsg_data(nlh);
1976         read_lock(&neigh_tbl_lock);
1977         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1978                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1979                         continue;
1980
1981                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1982                         break;
1983         }
1984
1985         if (tbl == NULL) {
1986                 err = -ENOENT;
1987                 goto errout_locked;
1988         }
1989
1990         /*
1991          * We acquire tbl->lock to be nice to the periodic timers and
1992          * make sure they always see a consistent set of values.
1993          */
1994         write_lock_bh(&tbl->lock);
1995
1996         if (tb[NDTA_PARMS]) {
1997                 struct nlattr *tbp[NDTPA_MAX+1];
1998                 struct neigh_parms *p;
1999                 int i, ifindex = 0;
2000
2001                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2002                                        nl_ntbl_parm_policy);
2003                 if (err < 0)
2004                         goto errout_tbl_lock;
2005
2006                 if (tbp[NDTPA_IFINDEX])
2007                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2008
2009                 p = lookup_neigh_parms(tbl, net, ifindex);
2010                 if (p == NULL) {
2011                         err = -ENOENT;
2012                         goto errout_tbl_lock;
2013                 }
2014
2015                 for (i = 1; i <= NDTPA_MAX; i++) {
2016                         if (tbp[i] == NULL)
2017                                 continue;
2018
2019                         switch (i) {
2020                         case NDTPA_QUEUE_LEN:
2021                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2022                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2023                                 break;
2024                         case NDTPA_QUEUE_LENBYTES:
2025                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2026                                 break;
2027                         case NDTPA_PROXY_QLEN:
2028                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2029                                 break;
2030                         case NDTPA_APP_PROBES:
2031                                 p->app_probes = nla_get_u32(tbp[i]);
2032                                 break;
2033                         case NDTPA_UCAST_PROBES:
2034                                 p->ucast_probes = nla_get_u32(tbp[i]);
2035                                 break;
2036                         case NDTPA_MCAST_PROBES:
2037                                 p->mcast_probes = nla_get_u32(tbp[i]);
2038                                 break;
2039                         case NDTPA_BASE_REACHABLE_TIME:
2040                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2041                                 break;
2042                         case NDTPA_GC_STALETIME:
2043                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2044                                 break;
2045                         case NDTPA_DELAY_PROBE_TIME:
2046                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2047                                 break;
2048                         case NDTPA_RETRANS_TIME:
2049                                 p->retrans_time = nla_get_msecs(tbp[i]);
2050                                 break;
2051                         case NDTPA_ANYCAST_DELAY:
2052                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2053                                 break;
2054                         case NDTPA_PROXY_DELAY:
2055                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2056                                 break;
2057                         case NDTPA_LOCKTIME:
2058                                 p->locktime = nla_get_msecs(tbp[i]);
2059                                 break;
2060                         }
2061                 }
2062         }
2063
2064         if (tb[NDTA_THRESH1])
2065                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2066
2067         if (tb[NDTA_THRESH2])
2068                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2069
2070         if (tb[NDTA_THRESH3])
2071                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2072
2073         if (tb[NDTA_GC_INTERVAL])
2074                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2075
2076         err = 0;
2077
2078 errout_tbl_lock:
2079         write_unlock_bh(&tbl->lock);
2080 errout_locked:
2081         read_unlock(&neigh_tbl_lock);
2082 errout:
2083         return err;
2084 }
2085
2086 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2087 {
2088         struct net *net = sock_net(skb->sk);
2089         int family, tidx, nidx = 0;
2090         int tbl_skip = cb->args[0];
2091         int neigh_skip = cb->args[1];
2092         struct neigh_table *tbl;
2093
2094         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2095
2096         read_lock(&neigh_tbl_lock);
2097         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2098                 struct neigh_parms *p;
2099
2100                 if (tidx < tbl_skip || (family && tbl->family != family))
2101                         continue;
2102
2103                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2104                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2105                                        NLM_F_MULTI) <= 0)
2106                         break;
2107
2108                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2109                         if (!net_eq(neigh_parms_net(p), net))
2110                                 continue;
2111
2112                         if (nidx < neigh_skip)
2113                                 goto next;
2114
2115                         if (neightbl_fill_param_info(skb, tbl, p,
2116                                                      NETLINK_CB(cb->skb).portid,
2117                                                      cb->nlh->nlmsg_seq,
2118                                                      RTM_NEWNEIGHTBL,
2119                                                      NLM_F_MULTI) <= 0)
2120                                 goto out;
2121                 next:
2122                         nidx++;
2123                 }
2124
2125                 neigh_skip = 0;
2126         }
2127 out:
2128         read_unlock(&neigh_tbl_lock);
2129         cb->args[0] = tidx;
2130         cb->args[1] = nidx;
2131
2132         return skb->len;
2133 }
2134
2135 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2136                            u32 pid, u32 seq, int type, unsigned int flags)
2137 {
2138         unsigned long now = jiffies;
2139         struct nda_cacheinfo ci;
2140         struct nlmsghdr *nlh;
2141         struct ndmsg *ndm;
2142
2143         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2144         if (nlh == NULL)
2145                 return -EMSGSIZE;
2146
2147         ndm = nlmsg_data(nlh);
2148         ndm->ndm_family  = neigh->ops->family;
2149         ndm->ndm_pad1    = 0;
2150         ndm->ndm_pad2    = 0;
2151         ndm->ndm_flags   = neigh->flags;
2152         ndm->ndm_type    = neigh->type;
2153         ndm->ndm_ifindex = neigh->dev->ifindex;
2154
2155         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2156                 goto nla_put_failure;
2157
2158         read_lock_bh(&neigh->lock);
2159         ndm->ndm_state   = neigh->nud_state;
2160         if (neigh->nud_state & NUD_VALID) {
2161                 char haddr[MAX_ADDR_LEN];
2162
2163                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2164                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2165                         read_unlock_bh(&neigh->lock);
2166                         goto nla_put_failure;
2167                 }
2168         }
2169
2170         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2171         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2172         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2173         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2174         read_unlock_bh(&neigh->lock);
2175
2176         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2177             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2178                 goto nla_put_failure;
2179
2180         return nlmsg_end(skb, nlh);
2181
2182 nla_put_failure:
2183         nlmsg_cancel(skb, nlh);
2184         return -EMSGSIZE;
2185 }
2186
2187 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2188                             u32 pid, u32 seq, int type, unsigned int flags,
2189                             struct neigh_table *tbl)
2190 {
2191         struct nlmsghdr *nlh;
2192         struct ndmsg *ndm;
2193
2194         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2195         if (nlh == NULL)
2196                 return -EMSGSIZE;
2197
2198         ndm = nlmsg_data(nlh);
2199         ndm->ndm_family  = tbl->family;
2200         ndm->ndm_pad1    = 0;
2201         ndm->ndm_pad2    = 0;
2202         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2203         ndm->ndm_type    = NDA_DST;
2204         ndm->ndm_ifindex = pn->dev->ifindex;
2205         ndm->ndm_state   = NUD_NONE;
2206
2207         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2208                 goto nla_put_failure;
2209
2210         return nlmsg_end(skb, nlh);
2211
2212 nla_put_failure:
2213         nlmsg_cancel(skb, nlh);
2214         return -EMSGSIZE;
2215 }
2216
2217 static void neigh_update_notify(struct neighbour *neigh)
2218 {
2219         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2220         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2221 }
2222
2223 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2224                             struct netlink_callback *cb)
2225 {
2226         struct net *net = sock_net(skb->sk);
2227         struct neighbour *n;
2228         int rc, h, s_h = cb->args[1];
2229         int idx, s_idx = idx = cb->args[2];
2230         struct neigh_hash_table *nht;
2231
2232         rcu_read_lock_bh();
2233         nht = rcu_dereference_bh(tbl->nht);
2234
2235         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2236                 if (h > s_h)
2237                         s_idx = 0;
2238                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2239                      n != NULL;
2240                      n = rcu_dereference_bh(n->next)) {
2241                         if (!net_eq(dev_net(n->dev), net))
2242                                 continue;
2243                         if (idx < s_idx)
2244                                 goto next;
2245                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2246                                             cb->nlh->nlmsg_seq,
2247                                             RTM_NEWNEIGH,
2248                                             NLM_F_MULTI) <= 0) {
2249                                 rc = -1;
2250                                 goto out;
2251                         }
2252 next:
2253                         idx++;
2254                 }
2255         }
2256         rc = skb->len;
2257 out:
2258         rcu_read_unlock_bh();
2259         cb->args[1] = h;
2260         cb->args[2] = idx;
2261         return rc;
2262 }
2263
2264 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2265                              struct netlink_callback *cb)
2266 {
2267         struct pneigh_entry *n;
2268         struct net *net = sock_net(skb->sk);
2269         int rc, h, s_h = cb->args[3];
2270         int idx, s_idx = idx = cb->args[4];
2271
2272         read_lock_bh(&tbl->lock);
2273
2274         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2275                 if (h > s_h)
2276                         s_idx = 0;
2277                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2278                         if (dev_net(n->dev) != net)
2279                                 continue;
2280                         if (idx < s_idx)
2281                                 goto next;
2282                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2283                                             cb->nlh->nlmsg_seq,
2284                                             RTM_NEWNEIGH,
2285                                             NLM_F_MULTI, tbl) <= 0) {
2286                                 read_unlock_bh(&tbl->lock);
2287                                 rc = -1;
2288                                 goto out;
2289                         }
2290                 next:
2291                         idx++;
2292                 }
2293         }
2294
2295         read_unlock_bh(&tbl->lock);
2296         rc = skb->len;
2297 out:
2298         cb->args[3] = h;
2299         cb->args[4] = idx;
2300         return rc;
2301
2302 }
2303
2304 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2305 {
2306         struct neigh_table *tbl;
2307         int t, family, s_t;
2308         int proxy = 0;
2309         int err;
2310
2311         read_lock(&neigh_tbl_lock);
2312         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2313
2314         /* check for full ndmsg structure presence, family member is
2315          * the same for both structures
2316          */
2317         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2318             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2319                 proxy = 1;
2320
2321         s_t = cb->args[0];
2322
2323         for (tbl = neigh_tables, t = 0; tbl;
2324              tbl = tbl->next, t++) {
2325                 if (t < s_t || (family && tbl->family != family))
2326                         continue;
2327                 if (t > s_t)
2328                         memset(&cb->args[1], 0, sizeof(cb->args) -
2329                                                 sizeof(cb->args[0]));
2330                 if (proxy)
2331                         err = pneigh_dump_table(tbl, skb, cb);
2332                 else
2333                         err = neigh_dump_table(tbl, skb, cb);
2334                 if (err < 0)
2335                         break;
2336         }
2337         read_unlock(&neigh_tbl_lock);
2338
2339         cb->args[0] = t;
2340         return skb->len;
2341 }
2342
2343 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2344 {
2345         int chain;
2346         struct neigh_hash_table *nht;
2347
2348         rcu_read_lock_bh();
2349         nht = rcu_dereference_bh(tbl->nht);
2350
2351         read_lock(&tbl->lock); /* avoid resizes */
2352         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2353                 struct neighbour *n;
2354
2355                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2356                      n != NULL;
2357                      n = rcu_dereference_bh(n->next))
2358                         cb(n, cookie);
2359         }
2360         read_unlock(&tbl->lock);
2361         rcu_read_unlock_bh();
2362 }
2363 EXPORT_SYMBOL(neigh_for_each);
2364
2365 /* The tbl->lock must be held as a writer and BH disabled. */
2366 void __neigh_for_each_release(struct neigh_table *tbl,
2367                               int (*cb)(struct neighbour *))
2368 {
2369         int chain;
2370         struct neigh_hash_table *nht;
2371
2372         nht = rcu_dereference_protected(tbl->nht,
2373                                         lockdep_is_held(&tbl->lock));
2374         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2375                 struct neighbour *n;
2376                 struct neighbour __rcu **np;
2377
2378                 np = &nht->hash_buckets[chain];
2379                 while ((n = rcu_dereference_protected(*np,
2380                                         lockdep_is_held(&tbl->lock))) != NULL) {
2381                         int release;
2382
2383                         write_lock(&n->lock);
2384                         release = cb(n);
2385                         if (release) {
2386                                 rcu_assign_pointer(*np,
2387                                         rcu_dereference_protected(n->next,
2388                                                 lockdep_is_held(&tbl->lock)));
2389                                 n->dead = 1;
2390                         } else
2391                                 np = &n->next;
2392                         write_unlock(&n->lock);
2393                         if (release)
2394                                 neigh_cleanup_and_release(n);
2395                 }
2396         }
2397 }
2398 EXPORT_SYMBOL(__neigh_for_each_release);
2399
2400 #ifdef CONFIG_PROC_FS
2401
2402 static struct neighbour *neigh_get_first(struct seq_file *seq)
2403 {
2404         struct neigh_seq_state *state = seq->private;
2405         struct net *net = seq_file_net(seq);
2406         struct neigh_hash_table *nht = state->nht;
2407         struct neighbour *n = NULL;
2408         int bucket = state->bucket;
2409
2410         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2411         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2412                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2413
2414                 while (n) {
2415                         if (!net_eq(dev_net(n->dev), net))
2416                                 goto next;
2417                         if (state->neigh_sub_iter) {
2418                                 loff_t fakep = 0;
2419                                 void *v;
2420
2421                                 v = state->neigh_sub_iter(state, n, &fakep);
2422                                 if (!v)
2423                                         goto next;
2424                         }
2425                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2426                                 break;
2427                         if (n->nud_state & ~NUD_NOARP)
2428                                 break;
2429 next:
2430                         n = rcu_dereference_bh(n->next);
2431                 }
2432
2433                 if (n)
2434                         break;
2435         }
2436         state->bucket = bucket;
2437
2438         return n;
2439 }
2440
2441 static struct neighbour *neigh_get_next(struct seq_file *seq,
2442                                         struct neighbour *n,
2443                                         loff_t *pos)
2444 {
2445         struct neigh_seq_state *state = seq->private;
2446         struct net *net = seq_file_net(seq);
2447         struct neigh_hash_table *nht = state->nht;
2448
2449         if (state->neigh_sub_iter) {
2450                 void *v = state->neigh_sub_iter(state, n, pos);
2451                 if (v)
2452                         return n;
2453         }
2454         n = rcu_dereference_bh(n->next);
2455
2456         while (1) {
2457                 while (n) {
2458                         if (!net_eq(dev_net(n->dev), net))
2459                                 goto next;
2460                         if (state->neigh_sub_iter) {
2461                                 void *v = state->neigh_sub_iter(state, n, pos);
2462                                 if (v)
2463                                         return n;
2464                                 goto next;
2465                         }
2466                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2467                                 break;
2468
2469                         if (n->nud_state & ~NUD_NOARP)
2470                                 break;
2471 next:
2472                         n = rcu_dereference_bh(n->next);
2473                 }
2474
2475                 if (n)
2476                         break;
2477
2478                 if (++state->bucket >= (1 << nht->hash_shift))
2479                         break;
2480
2481                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2482         }
2483
2484         if (n && pos)
2485                 --(*pos);
2486         return n;
2487 }
2488
2489 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2490 {
2491         struct neighbour *n = neigh_get_first(seq);
2492
2493         if (n) {
2494                 --(*pos);
2495                 while (*pos) {
2496                         n = neigh_get_next(seq, n, pos);
2497                         if (!n)
2498                                 break;
2499                 }
2500         }
2501         return *pos ? NULL : n;
2502 }
2503
2504 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2505 {
2506         struct neigh_seq_state *state = seq->private;
2507         struct net *net = seq_file_net(seq);
2508         struct neigh_table *tbl = state->tbl;
2509         struct pneigh_entry *pn = NULL;
2510         int bucket = state->bucket;
2511
2512         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2513         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2514                 pn = tbl->phash_buckets[bucket];
2515                 while (pn && !net_eq(pneigh_net(pn), net))
2516                         pn = pn->next;
2517                 if (pn)
2518                         break;
2519         }
2520         state->bucket = bucket;
2521
2522         return pn;
2523 }
2524
2525 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2526                                             struct pneigh_entry *pn,
2527                                             loff_t *pos)
2528 {
2529         struct neigh_seq_state *state = seq->private;
2530         struct net *net = seq_file_net(seq);
2531         struct neigh_table *tbl = state->tbl;
2532
2533         do {
2534                 pn = pn->next;
2535         } while (pn && !net_eq(pneigh_net(pn), net));
2536
2537         while (!pn) {
2538                 if (++state->bucket > PNEIGH_HASHMASK)
2539                         break;
2540                 pn = tbl->phash_buckets[state->bucket];
2541                 while (pn && !net_eq(pneigh_net(pn), net))
2542                         pn = pn->next;
2543                 if (pn)
2544                         break;
2545         }
2546
2547         if (pn && pos)
2548                 --(*pos);
2549
2550         return pn;
2551 }
2552
2553 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2554 {
2555         struct pneigh_entry *pn = pneigh_get_first(seq);
2556
2557         if (pn) {
2558                 --(*pos);
2559                 while (*pos) {
2560                         pn = pneigh_get_next(seq, pn, pos);
2561                         if (!pn)
2562                                 break;
2563                 }
2564         }
2565         return *pos ? NULL : pn;
2566 }
2567
2568 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2569 {
2570         struct neigh_seq_state *state = seq->private;
2571         void *rc;
2572         loff_t idxpos = *pos;
2573
2574         rc = neigh_get_idx(seq, &idxpos);
2575         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2576                 rc = pneigh_get_idx(seq, &idxpos);
2577
2578         return rc;
2579 }
2580
2581 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2582         __acquires(rcu_bh)
2583 {
2584         struct neigh_seq_state *state = seq->private;
2585
2586         state->tbl = tbl;
2587         state->bucket = 0;
2588         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2589
2590         rcu_read_lock_bh();
2591         state->nht = rcu_dereference_bh(tbl->nht);
2592
2593         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2594 }
2595 EXPORT_SYMBOL(neigh_seq_start);
2596
2597 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2598 {
2599         struct neigh_seq_state *state;
2600         void *rc;
2601
2602         if (v == SEQ_START_TOKEN) {
2603                 rc = neigh_get_first(seq);
2604                 goto out;
2605         }
2606
2607         state = seq->private;
2608         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2609                 rc = neigh_get_next(seq, v, NULL);
2610                 if (rc)
2611                         goto out;
2612                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2613                         rc = pneigh_get_first(seq);
2614         } else {
2615                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2616                 rc = pneigh_get_next(seq, v, NULL);
2617         }
2618 out:
2619         ++(*pos);
2620         return rc;
2621 }
2622 EXPORT_SYMBOL(neigh_seq_next);
2623
2624 void neigh_seq_stop(struct seq_file *seq, void *v)
2625         __releases(rcu_bh)
2626 {
2627         rcu_read_unlock_bh();
2628 }
2629 EXPORT_SYMBOL(neigh_seq_stop);
2630
2631 /* statistics via seq_file */
2632
2633 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2634 {
2635         struct neigh_table *tbl = seq->private;
2636         int cpu;
2637
2638         if (*pos == 0)
2639                 return SEQ_START_TOKEN;
2640
2641         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2642                 if (!cpu_possible(cpu))
2643                         continue;
2644                 *pos = cpu+1;
2645                 return per_cpu_ptr(tbl->stats, cpu);
2646         }
2647         return NULL;
2648 }
2649
2650 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2651 {
2652         struct neigh_table *tbl = seq->private;
2653         int cpu;
2654
2655         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2656                 if (!cpu_possible(cpu))
2657                         continue;
2658                 *pos = cpu+1;
2659                 return per_cpu_ptr(tbl->stats, cpu);
2660         }
2661         return NULL;
2662 }
2663
2664 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2665 {
2666
2667 }
2668
2669 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2670 {
2671         struct neigh_table *tbl = seq->private;
2672         struct neigh_statistics *st = v;
2673
2674         if (v == SEQ_START_TOKEN) {
2675                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2676                 return 0;
2677         }
2678
2679         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2680                         "%08lx %08lx  %08lx %08lx %08lx\n",
2681                    atomic_read(&tbl->entries),
2682
2683                    st->allocs,
2684                    st->destroys,
2685                    st->hash_grows,
2686
2687                    st->lookups,
2688                    st->hits,
2689
2690                    st->res_failed,
2691
2692                    st->rcv_probes_mcast,
2693                    st->rcv_probes_ucast,
2694
2695                    st->periodic_gc_runs,
2696                    st->forced_gc_runs,
2697                    st->unres_discards
2698                    );
2699
2700         return 0;
2701 }
2702
2703 static const struct seq_operations neigh_stat_seq_ops = {
2704         .start  = neigh_stat_seq_start,
2705         .next   = neigh_stat_seq_next,
2706         .stop   = neigh_stat_seq_stop,
2707         .show   = neigh_stat_seq_show,
2708 };
2709
2710 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2711 {
2712         int ret = seq_open(file, &neigh_stat_seq_ops);
2713
2714         if (!ret) {
2715                 struct seq_file *sf = file->private_data;
2716                 sf->private = PDE(inode)->data;
2717         }
2718         return ret;
2719 };
2720
2721 static const struct file_operations neigh_stat_seq_fops = {
2722         .owner   = THIS_MODULE,
2723         .open    = neigh_stat_seq_open,
2724         .read    = seq_read,
2725         .llseek  = seq_lseek,
2726         .release = seq_release,
2727 };
2728
2729 #endif /* CONFIG_PROC_FS */
2730
2731 static inline size_t neigh_nlmsg_size(void)
2732 {
2733         return NLMSG_ALIGN(sizeof(struct ndmsg))
2734                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2735                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2736                + nla_total_size(sizeof(struct nda_cacheinfo))
2737                + nla_total_size(4); /* NDA_PROBES */
2738 }
2739
2740 static void __neigh_notify(struct neighbour *n, int type, int flags)
2741 {
2742         struct net *net = dev_net(n->dev);
2743         struct sk_buff *skb;
2744         int err = -ENOBUFS;
2745
2746         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2747         if (skb == NULL)
2748                 goto errout;
2749
2750         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2751         if (err < 0) {
2752                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2753                 WARN_ON(err == -EMSGSIZE);
2754                 kfree_skb(skb);
2755                 goto errout;
2756         }
2757         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2758         return;
2759 errout:
2760         if (err < 0)
2761                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2762 }
2763
2764 #ifdef CONFIG_ARPD
2765 void neigh_app_ns(struct neighbour *n)
2766 {
2767         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2768 }
2769 EXPORT_SYMBOL(neigh_app_ns);
2770 #endif /* CONFIG_ARPD */
2771
2772 #ifdef CONFIG_SYSCTL
2773
2774 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2775                            size_t *lenp, loff_t *ppos)
2776 {
2777         int size, ret;
2778         ctl_table tmp = *ctl;
2779
2780         tmp.data = &size;
2781         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2782         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2783         if (write && !ret)
2784                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2785         return ret;
2786 }
2787
2788 enum {
2789         NEIGH_VAR_MCAST_PROBE,
2790         NEIGH_VAR_UCAST_PROBE,
2791         NEIGH_VAR_APP_PROBE,
2792         NEIGH_VAR_RETRANS_TIME,
2793         NEIGH_VAR_BASE_REACHABLE_TIME,
2794         NEIGH_VAR_DELAY_PROBE_TIME,
2795         NEIGH_VAR_GC_STALETIME,
2796         NEIGH_VAR_QUEUE_LEN,
2797         NEIGH_VAR_QUEUE_LEN_BYTES,
2798         NEIGH_VAR_PROXY_QLEN,
2799         NEIGH_VAR_ANYCAST_DELAY,
2800         NEIGH_VAR_PROXY_DELAY,
2801         NEIGH_VAR_LOCKTIME,
2802         NEIGH_VAR_RETRANS_TIME_MS,
2803         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2804         NEIGH_VAR_GC_INTERVAL,
2805         NEIGH_VAR_GC_THRESH1,
2806         NEIGH_VAR_GC_THRESH2,
2807         NEIGH_VAR_GC_THRESH3,
2808         NEIGH_VAR_MAX
2809 };
2810
2811 static struct neigh_sysctl_table {
2812         struct ctl_table_header *sysctl_header;
2813         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2814 } neigh_sysctl_template __read_mostly = {
2815         .neigh_vars = {
2816                 [NEIGH_VAR_MCAST_PROBE] = {
2817                         .procname       = "mcast_solicit",
2818                         .maxlen         = sizeof(int),
2819                         .mode           = 0644,
2820                         .proc_handler   = proc_dointvec,
2821                 },
2822                 [NEIGH_VAR_UCAST_PROBE] = {
2823                         .procname       = "ucast_solicit",
2824                         .maxlen         = sizeof(int),
2825                         .mode           = 0644,
2826                         .proc_handler   = proc_dointvec,
2827                 },
2828                 [NEIGH_VAR_APP_PROBE] = {
2829                         .procname       = "app_solicit",
2830                         .maxlen         = sizeof(int),
2831                         .mode           = 0644,
2832                         .proc_handler   = proc_dointvec,
2833                 },
2834                 [NEIGH_VAR_RETRANS_TIME] = {
2835                         .procname       = "retrans_time",
2836                         .maxlen         = sizeof(int),
2837                         .mode           = 0644,
2838                         .proc_handler   = proc_dointvec_userhz_jiffies,
2839                 },
2840                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2841                         .procname       = "base_reachable_time",
2842                         .maxlen         = sizeof(int),
2843                         .mode           = 0644,
2844                         .proc_handler   = proc_dointvec_jiffies,
2845                 },
2846                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2847                         .procname       = "delay_first_probe_time",
2848                         .maxlen         = sizeof(int),
2849                         .mode           = 0644,
2850                         .proc_handler   = proc_dointvec_jiffies,
2851                 },
2852                 [NEIGH_VAR_GC_STALETIME] = {
2853                         .procname       = "gc_stale_time",
2854                         .maxlen         = sizeof(int),
2855                         .mode           = 0644,
2856                         .proc_handler   = proc_dointvec_jiffies,
2857                 },
2858                 [NEIGH_VAR_QUEUE_LEN] = {
2859                         .procname       = "unres_qlen",
2860                         .maxlen         = sizeof(int),
2861                         .mode           = 0644,
2862                         .proc_handler   = proc_unres_qlen,
2863                 },
2864                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2865                         .procname       = "unres_qlen_bytes",
2866                         .maxlen         = sizeof(int),
2867                         .mode           = 0644,
2868                         .proc_handler   = proc_dointvec,
2869                 },
2870                 [NEIGH_VAR_PROXY_QLEN] = {
2871                         .procname       = "proxy_qlen",
2872                         .maxlen         = sizeof(int),
2873                         .mode           = 0644,
2874                         .proc_handler   = proc_dointvec,
2875                 },
2876                 [NEIGH_VAR_ANYCAST_DELAY] = {
2877                         .procname       = "anycast_delay",
2878                         .maxlen         = sizeof(int),
2879                         .mode           = 0644,
2880                         .proc_handler   = proc_dointvec_userhz_jiffies,
2881                 },
2882                 [NEIGH_VAR_PROXY_DELAY] = {
2883                         .procname       = "proxy_delay",
2884                         .maxlen         = sizeof(int),
2885                         .mode           = 0644,
2886                         .proc_handler   = proc_dointvec_userhz_jiffies,
2887                 },
2888                 [NEIGH_VAR_LOCKTIME] = {
2889                         .procname       = "locktime",
2890                         .maxlen         = sizeof(int),
2891                         .mode           = 0644,
2892                         .proc_handler   = proc_dointvec_userhz_jiffies,
2893                 },
2894                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2895                         .procname       = "retrans_time_ms",
2896                         .maxlen         = sizeof(int),
2897                         .mode           = 0644,
2898                         .proc_handler   = proc_dointvec_ms_jiffies,
2899                 },
2900                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2901                         .procname       = "base_reachable_time_ms",
2902                         .maxlen         = sizeof(int),
2903                         .mode           = 0644,
2904                         .proc_handler   = proc_dointvec_ms_jiffies,
2905                 },
2906                 [NEIGH_VAR_GC_INTERVAL] = {
2907                         .procname       = "gc_interval",
2908                         .maxlen         = sizeof(int),
2909                         .mode           = 0644,
2910                         .proc_handler   = proc_dointvec_jiffies,
2911                 },
2912                 [NEIGH_VAR_GC_THRESH1] = {
2913                         .procname       = "gc_thresh1",
2914                         .maxlen         = sizeof(int),
2915                         .mode           = 0644,
2916                         .proc_handler   = proc_dointvec,
2917                 },
2918                 [NEIGH_VAR_GC_THRESH2] = {
2919                         .procname       = "gc_thresh2",
2920                         .maxlen         = sizeof(int),
2921                         .mode           = 0644,
2922                         .proc_handler   = proc_dointvec,
2923                 },
2924                 [NEIGH_VAR_GC_THRESH3] = {
2925                         .procname       = "gc_thresh3",
2926                         .maxlen         = sizeof(int),
2927                         .mode           = 0644,
2928                         .proc_handler   = proc_dointvec,
2929                 },
2930                 {},
2931         },
2932 };
2933
2934 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2935                           char *p_name, proc_handler *handler)
2936 {
2937         struct neigh_sysctl_table *t;
2938         const char *dev_name_source = NULL;
2939         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2940
2941         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2942         if (!t)
2943                 goto err;
2944
2945         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2946         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2947         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2948         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2949         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2950         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2951         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2952         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2953         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2954         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2955         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2956         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2957         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2958         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2959         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2960
2961         if (dev) {
2962                 dev_name_source = dev->name;
2963                 /* Terminate the table early */
2964                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2965                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2966         } else {
2967                 dev_name_source = "default";
2968                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2969                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2970                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2971                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2972         }
2973
2974
2975         if (handler) {
2976                 /* RetransTime */
2977                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2978                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2979                 /* ReachableTime */
2980                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2981                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2982                 /* RetransTime (in milliseconds)*/
2983                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2984                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2985                 /* ReachableTime (in milliseconds) */
2986                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2987                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2988         }
2989
2990         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2991                 p_name, dev_name_source);
2992         t->sysctl_header =
2993                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2994         if (!t->sysctl_header)
2995                 goto free;
2996
2997         p->sysctl_table = t;
2998         return 0;
2999
3000 free:
3001         kfree(t);
3002 err:
3003         return -ENOBUFS;
3004 }
3005 EXPORT_SYMBOL(neigh_sysctl_register);
3006
3007 void neigh_sysctl_unregister(struct neigh_parms *p)
3008 {
3009         if (p->sysctl_table) {
3010                 struct neigh_sysctl_table *t = p->sysctl_table;
3011                 p->sysctl_table = NULL;
3012                 unregister_net_sysctl_table(t->sysctl_header);
3013                 kfree(t);
3014         }
3015 }
3016 EXPORT_SYMBOL(neigh_sysctl_unregister);
3017
3018 #endif  /* CONFIG_SYSCTL */
3019
3020 static int __init neigh_init(void)
3021 {
3022         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3023         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3024         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3025
3026         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3027                       NULL);
3028         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3029
3030         return 0;
3031 }
3032
3033 subsys_initcall(neigh_init);
3034