upload tizen1.0 source
[kernel/linux-2.6.36.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 static struct ipv4_devconf ipv4_devconf = {
67         .data = {
68                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72         },
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .data = {
77                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82         },
83 };
84
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89         [IFA_LOCAL]             = { .type = NLA_U32 },
90         [IFA_ADDRESS]           = { .type = NLA_U32 },
91         [IFA_BROADCAST]         = { .type = NLA_U32 },
92         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99                          int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111
112 /* Locks all the inet devices. */
113
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122         if (ifa->ifa_dev)
123                 in_dev_put(ifa->ifa_dev);
124         kfree(ifa);
125 }
126
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134         struct net_device *dev = idev->dev;
135
136         WARN_ON(idev->ifa_list);
137         WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140                idev, dev ? dev->name : "NIL");
141 #endif
142         dev_put(dev);
143         if (!idev->dead)
144                 pr_err("Freeing alive in_device %p\n", idev);
145         else
146                 kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152         struct in_device *in_dev;
153
154         ASSERT_RTNL();
155
156         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157         if (!in_dev)
158                 goto out;
159         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160                         sizeof(in_dev->cnf));
161         in_dev->cnf.sysctl = NULL;
162         in_dev->dev = dev;
163         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164         if (!in_dev->arp_parms)
165                 goto out_kfree;
166         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167                 dev_disable_lro(dev);
168         /* Reference in_dev->dev */
169         dev_hold(dev);
170         /* Account for reference dev->ip_ptr (below) */
171         in_dev_hold(in_dev);
172
173         devinet_sysctl_register(in_dev);
174         ip_mc_init_dev(in_dev);
175         if (dev->flags & IFF_UP)
176                 ip_mc_up(in_dev);
177
178         /* we can receive as soon as ip_ptr is set -- do this last */
179         rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181         return in_dev;
182 out_kfree:
183         kfree(in_dev);
184         in_dev = NULL;
185         goto out;
186 }
187
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190         struct in_device *idev = container_of(head, struct in_device, rcu_head);
191         in_dev_put(idev);
192 }
193
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196         struct in_ifaddr *ifa;
197         struct net_device *dev;
198
199         ASSERT_RTNL();
200
201         dev = in_dev->dev;
202
203         in_dev->dead = 1;
204
205         ip_mc_destroy_dev(in_dev);
206
207         while ((ifa = in_dev->ifa_list) != NULL) {
208                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209                 inet_free_ifa(ifa);
210         }
211
212         dev->ip_ptr = NULL;
213
214         devinet_sysctl_unregister(in_dev);
215         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216         arp_ifdown(dev);
217
218         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223         rcu_read_lock();
224         for_primary_ifa(in_dev) {
225                 if (inet_ifa_match(a, ifa)) {
226                         if (!b || inet_ifa_match(b, ifa)) {
227                                 rcu_read_unlock();
228                                 return 1;
229                         }
230                 }
231         } endfor_ifa(in_dev);
232         rcu_read_unlock();
233         return 0;
234 }
235
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237                          int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239         struct in_ifaddr *promote = NULL;
240         struct in_ifaddr *ifa, *ifa1 = *ifap;
241         struct in_ifaddr *last_prim = in_dev->ifa_list;
242         struct in_ifaddr *prev_prom = NULL;
243         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245         ASSERT_RTNL();
246
247         /* 1. Deleting primary ifaddr forces deletion all secondaries
248          * unless alias promotion is set
249          **/
250
251         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254                 while ((ifa = *ifap1) != NULL) {
255                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256                             ifa1->ifa_scope <= ifa->ifa_scope)
257                                 last_prim = ifa;
258
259                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260                             ifa1->ifa_mask != ifa->ifa_mask ||
261                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
262                                 ifap1 = &ifa->ifa_next;
263                                 prev_prom = ifa;
264                                 continue;
265                         }
266
267                         if (!do_promote) {
268                                 *ifap1 = ifa->ifa_next;
269
270                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271                                 blocking_notifier_call_chain(&inetaddr_chain,
272                                                 NETDEV_DOWN, ifa);
273                                 inet_free_ifa(ifa);
274                         } else {
275                                 promote = ifa;
276                                 break;
277                         }
278                 }
279         }
280
281         /* 2. Unlink it */
282
283         *ifap = ifa1->ifa_next;
284
285         /* 3. Announce address deletion */
286
287         /* Send message first, then call notifier.
288            At first sight, FIB update triggered by notifier
289            will refer to already deleted ifaddr, that could confuse
290            netlink listeners. It is not true: look, gated sees
291            that route deleted and if it still thinks that ifaddr
292            is valid, it will try to restore deleted routes... Grr.
293            So that, this order is correct.
294          */
295         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298         if (promote) {
299
300                 if (prev_prom) {
301                         prev_prom->ifa_next = promote->ifa_next;
302                         promote->ifa_next = last_prim->ifa_next;
303                         last_prim->ifa_next = promote;
304                 }
305
306                 promote->ifa_flags &= ~IFA_F_SECONDARY;
307                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308                 blocking_notifier_call_chain(&inetaddr_chain,
309                                 NETDEV_UP, promote);
310                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311                         if (ifa1->ifa_mask != ifa->ifa_mask ||
312                             !inet_ifa_match(ifa1->ifa_address, ifa))
313                                         continue;
314                         fib_add_ifaddr(ifa);
315                 }
316
317         }
318         if (destroy)
319                 inet_free_ifa(ifa1);
320 }
321
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323                          int destroy)
324 {
325         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329                              u32 pid)
330 {
331         struct in_device *in_dev = ifa->ifa_dev;
332         struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334         ASSERT_RTNL();
335
336         if (!ifa->ifa_local) {
337                 inet_free_ifa(ifa);
338                 return 0;
339         }
340
341         ifa->ifa_flags &= ~IFA_F_SECONDARY;
342         last_primary = &in_dev->ifa_list;
343
344         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345              ifap = &ifa1->ifa_next) {
346                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347                     ifa->ifa_scope <= ifa1->ifa_scope)
348                         last_primary = &ifa1->ifa_next;
349                 if (ifa1->ifa_mask == ifa->ifa_mask &&
350                     inet_ifa_match(ifa1->ifa_address, ifa)) {
351                         if (ifa1->ifa_local == ifa->ifa_local) {
352                                 inet_free_ifa(ifa);
353                                 return -EEXIST;
354                         }
355                         if (ifa1->ifa_scope != ifa->ifa_scope) {
356                                 inet_free_ifa(ifa);
357                                 return -EINVAL;
358                         }
359                         ifa->ifa_flags |= IFA_F_SECONDARY;
360                 }
361         }
362
363         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364                 net_srandom(ifa->ifa_local);
365                 ifap = last_primary;
366         }
367
368         ifa->ifa_next = *ifap;
369         *ifap = ifa;
370
371         /* Send message first, then call notifier.
372            Notifier will trigger FIB update, so that
373            listeners of netlink will know about new ifaddr */
374         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377         return 0;
378 }
379
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382         return __inet_insert_ifa(ifa, NULL, 0);
383 }
384
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387         struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389         ASSERT_RTNL();
390
391         if (!in_dev) {
392                 inet_free_ifa(ifa);
393                 return -ENOBUFS;
394         }
395         ipv4_devconf_setall(in_dev);
396         if (ifa->ifa_dev != in_dev) {
397                 WARN_ON(ifa->ifa_dev);
398                 in_dev_hold(in_dev);
399                 ifa->ifa_dev = in_dev;
400         }
401         if (ipv4_is_loopback(ifa->ifa_local))
402                 ifa->ifa_scope = RT_SCOPE_HOST;
403         return inet_insert_ifa(ifa);
404 }
405
406 struct in_device *inetdev_by_index(struct net *net, int ifindex)
407 {
408         struct net_device *dev;
409         struct in_device *in_dev = NULL;
410
411         rcu_read_lock();
412         dev = dev_get_by_index_rcu(net, ifindex);
413         if (dev)
414                 in_dev = in_dev_get(dev);
415         rcu_read_unlock();
416         return in_dev;
417 }
418 EXPORT_SYMBOL(inetdev_by_index);
419
420 /* Called only from RTNL semaphored context. No locks. */
421
422 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
423                                     __be32 mask)
424 {
425         ASSERT_RTNL();
426
427         for_primary_ifa(in_dev) {
428                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
429                         return ifa;
430         } endfor_ifa(in_dev);
431         return NULL;
432 }
433
434 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435 {
436         struct net *net = sock_net(skb->sk);
437         struct nlattr *tb[IFA_MAX+1];
438         struct in_device *in_dev;
439         struct ifaddrmsg *ifm;
440         struct in_ifaddr *ifa, **ifap;
441         int err = -EINVAL;
442
443         ASSERT_RTNL();
444
445         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
446         if (err < 0)
447                 goto errout;
448
449         ifm = nlmsg_data(nlh);
450         in_dev = inetdev_by_index(net, ifm->ifa_index);
451         if (in_dev == NULL) {
452                 err = -ENODEV;
453                 goto errout;
454         }
455
456         __in_dev_put(in_dev);
457
458         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459              ifap = &ifa->ifa_next) {
460                 if (tb[IFA_LOCAL] &&
461                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
462                         continue;
463
464                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
465                         continue;
466
467                 if (tb[IFA_ADDRESS] &&
468                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
469                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
470                         continue;
471
472                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
473                 return 0;
474         }
475
476         err = -EADDRNOTAVAIL;
477 errout:
478         return err;
479 }
480
481 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
482 {
483         struct nlattr *tb[IFA_MAX+1];
484         struct in_ifaddr *ifa;
485         struct ifaddrmsg *ifm;
486         struct net_device *dev;
487         struct in_device *in_dev;
488         int err;
489
490         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
491         if (err < 0)
492                 goto errout;
493
494         ifm = nlmsg_data(nlh);
495         err = -EINVAL;
496         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
497                 goto errout;
498
499         dev = __dev_get_by_index(net, ifm->ifa_index);
500         err = -ENODEV;
501         if (dev == NULL)
502                 goto errout;
503
504         in_dev = __in_dev_get_rtnl(dev);
505         err = -ENOBUFS;
506         if (in_dev == NULL)
507                 goto errout;
508
509         ifa = inet_alloc_ifa();
510         if (ifa == NULL)
511                 /*
512                  * A potential indev allocation can be left alive, it stays
513                  * assigned to its device and is destroy with it.
514                  */
515                 goto errout;
516
517         ipv4_devconf_setall(in_dev);
518         in_dev_hold(in_dev);
519
520         if (tb[IFA_ADDRESS] == NULL)
521                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
522
523         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
524         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
525         ifa->ifa_flags = ifm->ifa_flags;
526         ifa->ifa_scope = ifm->ifa_scope;
527         ifa->ifa_dev = in_dev;
528
529         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
530         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
531
532         if (tb[IFA_BROADCAST])
533                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
534
535         if (tb[IFA_LABEL])
536                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
537         else
538                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
539
540         return ifa;
541
542 errout:
543         return ERR_PTR(err);
544 }
545
546 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
547 {
548         struct net *net = sock_net(skb->sk);
549         struct in_ifaddr *ifa;
550
551         ASSERT_RTNL();
552
553         ifa = rtm_to_ifaddr(net, nlh);
554         if (IS_ERR(ifa))
555                 return PTR_ERR(ifa);
556
557         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
558 }
559
560 /*
561  *      Determine a default network mask, based on the IP address.
562  */
563
564 static inline int inet_abc_len(__be32 addr)
565 {
566         int rc = -1;    /* Something else, probably a multicast. */
567
568         if (ipv4_is_zeronet(addr))
569                 rc = 0;
570         else {
571                 __u32 haddr = ntohl(addr);
572
573                 if (IN_CLASSA(haddr))
574                         rc = 8;
575                 else if (IN_CLASSB(haddr))
576                         rc = 16;
577                 else if (IN_CLASSC(haddr))
578                         rc = 24;
579         }
580
581         return rc;
582 }
583
584
585 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
586 {
587         struct ifreq ifr;
588         struct sockaddr_in sin_orig;
589         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
590         struct in_device *in_dev;
591         struct in_ifaddr **ifap = NULL;
592         struct in_ifaddr *ifa = NULL;
593         struct net_device *dev;
594         char *colon;
595         int ret = -EFAULT;
596         int tryaddrmatch = 0;
597
598         /*
599          *      Fetch the caller's info block into kernel space
600          */
601
602         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
603                 goto out;
604         ifr.ifr_name[IFNAMSIZ - 1] = 0;
605
606         /* save original address for comparison */
607         memcpy(&sin_orig, sin, sizeof(*sin));
608
609         colon = strchr(ifr.ifr_name, ':');
610         if (colon)
611                 *colon = 0;
612
613         dev_load(net, ifr.ifr_name);
614
615         switch (cmd) {
616         case SIOCGIFADDR:       /* Get interface address */
617         case SIOCGIFBRDADDR:    /* Get the broadcast address */
618         case SIOCGIFDSTADDR:    /* Get the destination address */
619         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
620                 /* Note that these ioctls will not sleep,
621                    so that we do not impose a lock.
622                    One day we will be forced to put shlock here (I mean SMP)
623                  */
624                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
625                 memset(sin, 0, sizeof(*sin));
626                 sin->sin_family = AF_INET;
627                 break;
628
629         case SIOCSIFFLAGS:
630                 ret = -EACCES;
631                 if (!capable(CAP_NET_ADMIN))
632                         goto out;
633                 break;
634         case SIOCSIFADDR:       /* Set interface address (and family) */
635         case SIOCSIFBRDADDR:    /* Set the broadcast address */
636         case SIOCSIFDSTADDR:    /* Set the destination address */
637         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
638                 ret = -EACCES;
639                 if (!capable(CAP_NET_ADMIN))
640                         goto out;
641                 ret = -EINVAL;
642                 if (sin->sin_family != AF_INET)
643                         goto out;
644                 break;
645         default:
646                 ret = -EINVAL;
647                 goto out;
648         }
649
650         rtnl_lock();
651
652         ret = -ENODEV;
653         dev = __dev_get_by_name(net, ifr.ifr_name);
654         if (!dev)
655                 goto done;
656
657         if (colon)
658                 *colon = ':';
659
660         in_dev = __in_dev_get_rtnl(dev);
661         if (in_dev) {
662                 if (tryaddrmatch) {
663                         /* Matthias Andree */
664                         /* compare label and address (4.4BSD style) */
665                         /* note: we only do this for a limited set of ioctls
666                            and only if the original address family was AF_INET.
667                            This is checked above. */
668                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
669                              ifap = &ifa->ifa_next) {
670                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
671                                     sin_orig.sin_addr.s_addr ==
672                                                         ifa->ifa_address) {
673                                         break; /* found */
674                                 }
675                         }
676                 }
677                 /* we didn't get a match, maybe the application is
678                    4.3BSD-style and passed in junk so we fall back to
679                    comparing just the label */
680                 if (!ifa) {
681                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682                              ifap = &ifa->ifa_next)
683                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
684                                         break;
685                 }
686         }
687
688         ret = -EADDRNOTAVAIL;
689         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
690                 goto done;
691
692         switch (cmd) {
693         case SIOCGIFADDR:       /* Get interface address */
694                 sin->sin_addr.s_addr = ifa->ifa_local;
695                 goto rarok;
696
697         case SIOCGIFBRDADDR:    /* Get the broadcast address */
698                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
699                 goto rarok;
700
701         case SIOCGIFDSTADDR:    /* Get the destination address */
702                 sin->sin_addr.s_addr = ifa->ifa_address;
703                 goto rarok;
704
705         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
706                 sin->sin_addr.s_addr = ifa->ifa_mask;
707                 goto rarok;
708
709         case SIOCSIFFLAGS:
710                 if (colon) {
711                         ret = -EADDRNOTAVAIL;
712                         if (!ifa)
713                                 break;
714                         ret = 0;
715                         if (!(ifr.ifr_flags & IFF_UP))
716                                 inet_del_ifa(in_dev, ifap, 1);
717                         break;
718                 }
719                 ret = dev_change_flags(dev, ifr.ifr_flags);
720                 break;
721
722         case SIOCSIFADDR:       /* Set interface address (and family) */
723                 ret = -EINVAL;
724                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
725                         break;
726
727                 if (!ifa) {
728                         ret = -ENOBUFS;
729                         ifa = inet_alloc_ifa();
730                         if (!ifa)
731                                 break;
732                         if (colon)
733                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734                         else
735                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736                 } else {
737                         ret = 0;
738                         if (ifa->ifa_local == sin->sin_addr.s_addr)
739                                 break;
740                         inet_del_ifa(in_dev, ifap, 0);
741                         ifa->ifa_broadcast = 0;
742                         ifa->ifa_scope = 0;
743                 }
744
745                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746
747                 if (!(dev->flags & IFF_POINTOPOINT)) {
748                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750                         if ((dev->flags & IFF_BROADCAST) &&
751                             ifa->ifa_prefixlen < 31)
752                                 ifa->ifa_broadcast = ifa->ifa_address |
753                                                      ~ifa->ifa_mask;
754                 } else {
755                         ifa->ifa_prefixlen = 32;
756                         ifa->ifa_mask = inet_make_mask(32);
757                 }
758                 ret = inet_set_ifa(dev, ifa);
759                 break;
760
761         case SIOCSIFBRDADDR:    /* Set the broadcast address */
762                 ret = 0;
763                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764                         inet_del_ifa(in_dev, ifap, 0);
765                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
766                         inet_insert_ifa(ifa);
767                 }
768                 break;
769
770         case SIOCSIFDSTADDR:    /* Set the destination address */
771                 ret = 0;
772                 if (ifa->ifa_address == sin->sin_addr.s_addr)
773                         break;
774                 ret = -EINVAL;
775                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776                         break;
777                 ret = 0;
778                 inet_del_ifa(in_dev, ifap, 0);
779                 ifa->ifa_address = sin->sin_addr.s_addr;
780                 inet_insert_ifa(ifa);
781                 break;
782
783         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
784
785                 /*
786                  *      The mask we set must be legal.
787                  */
788                 ret = -EINVAL;
789                 if (bad_mask(sin->sin_addr.s_addr, 0))
790                         break;
791                 ret = 0;
792                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793                         __be32 old_mask = ifa->ifa_mask;
794                         inet_del_ifa(in_dev, ifap, 0);
795                         ifa->ifa_mask = sin->sin_addr.s_addr;
796                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797
798                         /* See if current broadcast address matches
799                          * with current netmask, then recalculate
800                          * the broadcast address. Otherwise it's a
801                          * funny address, so don't touch it since
802                          * the user seems to know what (s)he's doing...
803                          */
804                         if ((dev->flags & IFF_BROADCAST) &&
805                             (ifa->ifa_prefixlen < 31) &&
806                             (ifa->ifa_broadcast ==
807                              (ifa->ifa_local|~old_mask))) {
808                                 ifa->ifa_broadcast = (ifa->ifa_local |
809                                                       ~sin->sin_addr.s_addr);
810                         }
811                         inet_insert_ifa(ifa);
812                 }
813                 break;
814         }
815 done:
816         rtnl_unlock();
817 out:
818         return ret;
819 rarok:
820         rtnl_unlock();
821         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822         goto out;
823 }
824
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827         struct in_device *in_dev = __in_dev_get_rtnl(dev);
828         struct in_ifaddr *ifa;
829         struct ifreq ifr;
830         int done = 0;
831
832         if (!in_dev)
833                 goto out;
834
835         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
836                 if (!buf) {
837                         done += sizeof(ifr);
838                         continue;
839                 }
840                 if (len < (int) sizeof(ifr))
841                         break;
842                 memset(&ifr, 0, sizeof(struct ifreq));
843                 if (ifa->ifa_label)
844                         strcpy(ifr.ifr_name, ifa->ifa_label);
845                 else
846                         strcpy(ifr.ifr_name, dev->name);
847
848                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850                                                                 ifa->ifa_local;
851
852                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853                         done = -EFAULT;
854                         break;
855                 }
856                 buf  += sizeof(struct ifreq);
857                 len  -= sizeof(struct ifreq);
858                 done += sizeof(struct ifreq);
859         }
860 out:
861         return done;
862 }
863
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866         __be32 addr = 0;
867         struct in_device *in_dev;
868         struct net *net = dev_net(dev);
869
870         rcu_read_lock();
871         in_dev = __in_dev_get_rcu(dev);
872         if (!in_dev)
873                 goto no_in_dev;
874
875         for_primary_ifa(in_dev) {
876                 if (ifa->ifa_scope > scope)
877                         continue;
878                 if (!dst || inet_ifa_match(dst, ifa)) {
879                         addr = ifa->ifa_local;
880                         break;
881                 }
882                 if (!addr)
883                         addr = ifa->ifa_local;
884         } endfor_ifa(in_dev);
885
886         if (addr)
887                 goto out_unlock;
888 no_in_dev:
889
890         /* Not loopback addresses on loopback should be preferred
891            in this case. It is importnat that lo is the first interface
892            in dev_base list.
893          */
894         for_each_netdev_rcu(net, dev) {
895                 in_dev = __in_dev_get_rcu(dev);
896                 if (!in_dev)
897                         continue;
898
899                 for_primary_ifa(in_dev) {
900                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
901                             ifa->ifa_scope <= scope) {
902                                 addr = ifa->ifa_local;
903                                 goto out_unlock;
904                         }
905                 } endfor_ifa(in_dev);
906         }
907 out_unlock:
908         rcu_read_unlock();
909         return addr;
910 }
911 EXPORT_SYMBOL(inet_select_addr);
912
913 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
914                               __be32 local, int scope)
915 {
916         int same = 0;
917         __be32 addr = 0;
918
919         for_ifa(in_dev) {
920                 if (!addr &&
921                     (local == ifa->ifa_local || !local) &&
922                     ifa->ifa_scope <= scope) {
923                         addr = ifa->ifa_local;
924                         if (same)
925                                 break;
926                 }
927                 if (!same) {
928                         same = (!local || inet_ifa_match(local, ifa)) &&
929                                 (!dst || inet_ifa_match(dst, ifa));
930                         if (same && addr) {
931                                 if (local || !dst)
932                                         break;
933                                 /* Is the selected addr into dst subnet? */
934                                 if (inet_ifa_match(addr, ifa))
935                                         break;
936                                 /* No, then can we use new local src? */
937                                 if (ifa->ifa_scope <= scope) {
938                                         addr = ifa->ifa_local;
939                                         break;
940                                 }
941                                 /* search for large dst subnet for addr */
942                                 same = 0;
943                         }
944                 }
945         } endfor_ifa(in_dev);
946
947         return same ? addr : 0;
948 }
949
950 /*
951  * Confirm that local IP address exists using wildcards:
952  * - in_dev: only on this interface, 0=any interface
953  * - dst: only in the same subnet as dst, 0=any dst
954  * - local: address, 0=autoselect the local address
955  * - scope: maximum allowed scope value for the local address
956  */
957 __be32 inet_confirm_addr(struct in_device *in_dev,
958                          __be32 dst, __be32 local, int scope)
959 {
960         __be32 addr = 0;
961         struct net_device *dev;
962         struct net *net;
963
964         if (scope != RT_SCOPE_LINK)
965                 return confirm_addr_indev(in_dev, dst, local, scope);
966
967         net = dev_net(in_dev->dev);
968         rcu_read_lock();
969         for_each_netdev_rcu(net, dev) {
970                 in_dev = __in_dev_get_rcu(dev);
971                 if (in_dev) {
972                         addr = confirm_addr_indev(in_dev, dst, local, scope);
973                         if (addr)
974                                 break;
975                 }
976         }
977         rcu_read_unlock();
978
979         return addr;
980 }
981
982 /*
983  *      Device notifier
984  */
985
986 int register_inetaddr_notifier(struct notifier_block *nb)
987 {
988         return blocking_notifier_chain_register(&inetaddr_chain, nb);
989 }
990 EXPORT_SYMBOL(register_inetaddr_notifier);
991
992 int unregister_inetaddr_notifier(struct notifier_block *nb)
993 {
994         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
995 }
996 EXPORT_SYMBOL(unregister_inetaddr_notifier);
997
998 /* Rename ifa_labels for a device name change. Make some effort to preserve
999  * existing alias numbering and to create unique labels if possible.
1000 */
1001 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1002 {
1003         struct in_ifaddr *ifa;
1004         int named = 0;
1005
1006         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1007                 char old[IFNAMSIZ], *dot;
1008
1009                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1010                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1011                 if (named++ == 0)
1012                         goto skip;
1013                 dot = strchr(old, ':');
1014                 if (dot == NULL) {
1015                         sprintf(old, ":%d", named);
1016                         dot = old;
1017                 }
1018                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1019                         strcat(ifa->ifa_label, dot);
1020                 else
1021                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1022 skip:
1023                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1024         }
1025 }
1026
1027 static inline bool inetdev_valid_mtu(unsigned mtu)
1028 {
1029         return mtu >= 68;
1030 }
1031
1032 /* Called only under RTNL semaphore */
1033
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035                          void *ptr)
1036 {
1037         struct net_device *dev = ptr;
1038         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039
1040         ASSERT_RTNL();
1041
1042         if (!in_dev) {
1043                 if (event == NETDEV_REGISTER) {
1044                         in_dev = inetdev_init(dev);
1045                         if (!in_dev)
1046                                 return notifier_from_errno(-ENOMEM);
1047                         if (dev->flags & IFF_LOOPBACK) {
1048                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050                         }
1051                 } else if (event == NETDEV_CHANGEMTU) {
1052                         /* Re-enabling IP */
1053                         if (inetdev_valid_mtu(dev->mtu))
1054                                 in_dev = inetdev_init(dev);
1055                 }
1056                 goto out;
1057         }
1058
1059         switch (event) {
1060         case NETDEV_REGISTER:
1061                 printk(KERN_DEBUG "inetdev_event: bug\n");
1062                 dev->ip_ptr = NULL;
1063                 break;
1064         case NETDEV_UP:
1065                 if (!inetdev_valid_mtu(dev->mtu))
1066                         break;
1067                 if (dev->flags & IFF_LOOPBACK) {
1068                         struct in_ifaddr *ifa = inet_alloc_ifa();
1069
1070                         if (ifa) {
1071                                 ifa->ifa_local =
1072                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1073                                 ifa->ifa_prefixlen = 8;
1074                                 ifa->ifa_mask = inet_make_mask(8);
1075                                 in_dev_hold(in_dev);
1076                                 ifa->ifa_dev = in_dev;
1077                                 ifa->ifa_scope = RT_SCOPE_HOST;
1078                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1079                                 inet_insert_ifa(ifa);
1080                         }
1081                 }
1082                 ip_mc_up(in_dev);
1083                 /* fall through */
1084         case NETDEV_NOTIFY_PEERS:
1085         case NETDEV_CHANGEADDR:
1086                 /* Send gratuitous ARP to notify of link change */
1087                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1088                         struct in_ifaddr *ifa = in_dev->ifa_list;
1089
1090                         if (ifa)
1091                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1092                                          ifa->ifa_address, dev,
1093                                          ifa->ifa_address, NULL,
1094                                          dev->dev_addr, NULL);
1095                 }
1096                 break;
1097         case NETDEV_DOWN:
1098                 ip_mc_down(in_dev);
1099                 break;
1100         case NETDEV_PRE_TYPE_CHANGE:
1101                 ip_mc_unmap(in_dev);
1102                 break;
1103         case NETDEV_POST_TYPE_CHANGE:
1104                 ip_mc_remap(in_dev);
1105                 break;
1106         case NETDEV_CHANGEMTU:
1107                 if (inetdev_valid_mtu(dev->mtu))
1108                         break;
1109                 /* disable IP when MTU is not enough */
1110         case NETDEV_UNREGISTER:
1111                 inetdev_destroy(in_dev);
1112                 break;
1113         case NETDEV_CHANGENAME:
1114                 /* Do not notify about label change, this event is
1115                  * not interesting to applications using netlink.
1116                  */
1117                 inetdev_changename(dev, in_dev);
1118
1119                 devinet_sysctl_unregister(in_dev);
1120                 devinet_sysctl_register(in_dev);
1121                 break;
1122         }
1123 out:
1124         return NOTIFY_DONE;
1125 }
1126
1127 static struct notifier_block ip_netdev_notifier = {
1128         .notifier_call = inetdev_event,
1129 };
1130
1131 static inline size_t inet_nlmsg_size(void)
1132 {
1133         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1134                + nla_total_size(4) /* IFA_ADDRESS */
1135                + nla_total_size(4) /* IFA_LOCAL */
1136                + nla_total_size(4) /* IFA_BROADCAST */
1137                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1138 }
1139
1140 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1141                             u32 pid, u32 seq, int event, unsigned int flags)
1142 {
1143         struct ifaddrmsg *ifm;
1144         struct nlmsghdr  *nlh;
1145
1146         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1147         if (nlh == NULL)
1148                 return -EMSGSIZE;
1149
1150         ifm = nlmsg_data(nlh);
1151         ifm->ifa_family = AF_INET;
1152         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1153         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1154         ifm->ifa_scope = ifa->ifa_scope;
1155         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1156
1157         if (ifa->ifa_address)
1158                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1159
1160         if (ifa->ifa_local)
1161                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1162
1163         if (ifa->ifa_broadcast)
1164                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1165
1166         if (ifa->ifa_label[0])
1167                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1168
1169         return nlmsg_end(skb, nlh);
1170
1171 nla_put_failure:
1172         nlmsg_cancel(skb, nlh);
1173         return -EMSGSIZE;
1174 }
1175
1176 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177 {
1178         struct net *net = sock_net(skb->sk);
1179         int h, s_h;
1180         int idx, s_idx;
1181         int ip_idx, s_ip_idx;
1182         struct net_device *dev;
1183         struct in_device *in_dev;
1184         struct in_ifaddr *ifa;
1185         struct hlist_head *head;
1186         struct hlist_node *node;
1187
1188         s_h = cb->args[0];
1189         s_idx = idx = cb->args[1];
1190         s_ip_idx = ip_idx = cb->args[2];
1191
1192         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1193                 idx = 0;
1194                 head = &net->dev_index_head[h];
1195                 rcu_read_lock();
1196                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1197                         if (idx < s_idx)
1198                                 goto cont;
1199                         if (h > s_h || idx > s_idx)
1200                                 s_ip_idx = 0;
1201                         in_dev = __in_dev_get_rcu(dev);
1202                         if (!in_dev)
1203                                 goto cont;
1204
1205                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1206                              ifa = ifa->ifa_next, ip_idx++) {
1207                                 if (ip_idx < s_ip_idx)
1208                                         continue;
1209                                 if (inet_fill_ifaddr(skb, ifa,
1210                                              NETLINK_CB(cb->skb).pid,
1211                                              cb->nlh->nlmsg_seq,
1212                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1213                                         rcu_read_unlock();
1214                                         goto done;
1215                                 }
1216                         }
1217 cont:
1218                         idx++;
1219                 }
1220                 rcu_read_unlock();
1221         }
1222
1223 done:
1224         cb->args[0] = h;
1225         cb->args[1] = idx;
1226         cb->args[2] = ip_idx;
1227
1228         return skb->len;
1229 }
1230
1231 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1232                       u32 pid)
1233 {
1234         struct sk_buff *skb;
1235         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1236         int err = -ENOBUFS;
1237         struct net *net;
1238
1239         net = dev_net(ifa->ifa_dev->dev);
1240         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1241         if (skb == NULL)
1242                 goto errout;
1243
1244         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1245         if (err < 0) {
1246                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1247                 WARN_ON(err == -EMSGSIZE);
1248                 kfree_skb(skb);
1249                 goto errout;
1250         }
1251         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1252         return;
1253 errout:
1254         if (err < 0)
1255                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1256 }
1257
1258 #ifdef CONFIG_SYSCTL
1259
1260 static void devinet_copy_dflt_conf(struct net *net, int i)
1261 {
1262         struct net_device *dev;
1263
1264         rcu_read_lock();
1265         for_each_netdev_rcu(net, dev) {
1266                 struct in_device *in_dev;
1267
1268                 in_dev = __in_dev_get_rcu(dev);
1269                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1270                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1271         }
1272         rcu_read_unlock();
1273 }
1274
1275 /* called with RTNL locked */
1276 static void inet_forward_change(struct net *net)
1277 {
1278         struct net_device *dev;
1279         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1280
1281         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1282         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1283
1284         for_each_netdev(net, dev) {
1285                 struct in_device *in_dev;
1286                 if (on)
1287                         dev_disable_lro(dev);
1288                 rcu_read_lock();
1289                 in_dev = __in_dev_get_rcu(dev);
1290                 if (in_dev)
1291                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1292                 rcu_read_unlock();
1293         }
1294 }
1295
1296 static int devinet_conf_proc(ctl_table *ctl, int write,
1297                              void __user *buffer,
1298                              size_t *lenp, loff_t *ppos)
1299 {
1300         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1301
1302         if (write) {
1303                 struct ipv4_devconf *cnf = ctl->extra1;
1304                 struct net *net = ctl->extra2;
1305                 int i = (int *)ctl->data - cnf->data;
1306
1307                 set_bit(i, cnf->state);
1308
1309                 if (cnf == net->ipv4.devconf_dflt)
1310                         devinet_copy_dflt_conf(net, i);
1311         }
1312
1313         return ret;
1314 }
1315
1316 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1317                                   void __user *buffer,
1318                                   size_t *lenp, loff_t *ppos)
1319 {
1320         int *valp = ctl->data;
1321         int val = *valp;
1322         loff_t pos = *ppos;
1323         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1324
1325         if (write && *valp != val) {
1326                 struct net *net = ctl->extra2;
1327
1328                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1329                         if (!rtnl_trylock()) {
1330                                 /* Restore the original values before restarting */
1331                                 *valp = val;
1332                                 *ppos = pos;
1333                                 return restart_syscall();
1334                         }
1335                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1336                                 inet_forward_change(net);
1337                         } else if (*valp) {
1338                                 struct ipv4_devconf *cnf = ctl->extra1;
1339                                 struct in_device *idev =
1340                                         container_of(cnf, struct in_device, cnf);
1341                                 dev_disable_lro(idev->dev);
1342                         }
1343                         rtnl_unlock();
1344                         rt_cache_flush(net, 0);
1345                 }
1346         }
1347
1348         return ret;
1349 }
1350
1351 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1352                          void __user *buffer,
1353                          size_t *lenp, loff_t *ppos)
1354 {
1355         int *valp = ctl->data;
1356         int val = *valp;
1357         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1358         struct net *net = ctl->extra2;
1359
1360         if (write && *valp != val)
1361                 rt_cache_flush(net, 0);
1362
1363         return ret;
1364 }
1365
1366 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1367         { \
1368                 .procname       = name, \
1369                 .data           = ipv4_devconf.data + \
1370                                   IPV4_DEVCONF_ ## attr - 1, \
1371                 .maxlen         = sizeof(int), \
1372                 .mode           = mval, \
1373                 .proc_handler   = proc, \
1374                 .extra1         = &ipv4_devconf, \
1375         }
1376
1377 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1378         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1379
1380 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1381         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1382
1383 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1384         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1385
1386 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1387         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1388
1389 static struct devinet_sysctl_table {
1390         struct ctl_table_header *sysctl_header;
1391         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1392         char *dev_name;
1393 } devinet_sysctl = {
1394         .devinet_vars = {
1395                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1396                                              devinet_sysctl_forward),
1397                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1398
1399                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1400                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1401                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1402                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1403                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1404                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1405                                         "accept_source_route"),
1406                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1407                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1408                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1409                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1410                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1411                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1412                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1413                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1414                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1415                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1416                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1417                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1418                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1419
1420                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1421                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1422                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1423                                               "force_igmp_version"),
1424                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1425                                               "promote_secondaries"),
1426         },
1427 };
1428
1429 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1430                                         struct ipv4_devconf *p)
1431 {
1432         int i;
1433         struct devinet_sysctl_table *t;
1434
1435 #define DEVINET_CTL_PATH_DEV    3
1436
1437         struct ctl_path devinet_ctl_path[] = {
1438                 { .procname = "net",  },
1439                 { .procname = "ipv4", },
1440                 { .procname = "conf", },
1441                 { /* to be set */ },
1442                 { },
1443         };
1444
1445         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1446         if (!t)
1447                 goto out;
1448
1449         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1450                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1451                 t->devinet_vars[i].extra1 = p;
1452                 t->devinet_vars[i].extra2 = net;
1453         }
1454
1455         /*
1456          * Make a copy of dev_name, because '.procname' is regarded as const
1457          * by sysctl and we wouldn't want anyone to change it under our feet
1458          * (see SIOCSIFNAME).
1459          */
1460         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1461         if (!t->dev_name)
1462                 goto free;
1463
1464         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1465
1466         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1467                         t->devinet_vars);
1468         if (!t->sysctl_header)
1469                 goto free_procname;
1470
1471         p->sysctl = t;
1472         return 0;
1473
1474 free_procname:
1475         kfree(t->dev_name);
1476 free:
1477         kfree(t);
1478 out:
1479         return -ENOBUFS;
1480 }
1481
1482 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1483 {
1484         struct devinet_sysctl_table *t = cnf->sysctl;
1485
1486         if (t == NULL)
1487                 return;
1488
1489         cnf->sysctl = NULL;
1490         unregister_sysctl_table(t->sysctl_header);
1491         kfree(t->dev_name);
1492         kfree(t);
1493 }
1494
1495 static void devinet_sysctl_register(struct in_device *idev)
1496 {
1497         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1498         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1499                                         &idev->cnf);
1500 }
1501
1502 static void devinet_sysctl_unregister(struct in_device *idev)
1503 {
1504         __devinet_sysctl_unregister(&idev->cnf);
1505         neigh_sysctl_unregister(idev->arp_parms);
1506 }
1507
1508 static struct ctl_table ctl_forward_entry[] = {
1509         {
1510                 .procname       = "ip_forward",
1511                 .data           = &ipv4_devconf.data[
1512                                         IPV4_DEVCONF_FORWARDING - 1],
1513                 .maxlen         = sizeof(int),
1514                 .mode           = 0644,
1515                 .proc_handler   = devinet_sysctl_forward,
1516                 .extra1         = &ipv4_devconf,
1517                 .extra2         = &init_net,
1518         },
1519         { },
1520 };
1521
1522 static __net_initdata struct ctl_path net_ipv4_path[] = {
1523         { .procname = "net", },
1524         { .procname = "ipv4", },
1525         { },
1526 };
1527 #endif
1528
1529 static __net_init int devinet_init_net(struct net *net)
1530 {
1531         int err;
1532         struct ipv4_devconf *all, *dflt;
1533 #ifdef CONFIG_SYSCTL
1534         struct ctl_table *tbl = ctl_forward_entry;
1535         struct ctl_table_header *forw_hdr;
1536 #endif
1537
1538         err = -ENOMEM;
1539         all = &ipv4_devconf;
1540         dflt = &ipv4_devconf_dflt;
1541
1542         if (!net_eq(net, &init_net)) {
1543                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1544                 if (all == NULL)
1545                         goto err_alloc_all;
1546
1547                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1548                 if (dflt == NULL)
1549                         goto err_alloc_dflt;
1550
1551 #ifdef CONFIG_SYSCTL
1552                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1553                 if (tbl == NULL)
1554                         goto err_alloc_ctl;
1555
1556                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1557                 tbl[0].extra1 = all;
1558                 tbl[0].extra2 = net;
1559 #endif
1560         }
1561
1562 #ifdef CONFIG_SYSCTL
1563         err = __devinet_sysctl_register(net, "all", all);
1564         if (err < 0)
1565                 goto err_reg_all;
1566
1567         err = __devinet_sysctl_register(net, "default", dflt);
1568         if (err < 0)
1569                 goto err_reg_dflt;
1570
1571         err = -ENOMEM;
1572         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1573         if (forw_hdr == NULL)
1574                 goto err_reg_ctl;
1575         net->ipv4.forw_hdr = forw_hdr;
1576 #endif
1577
1578         net->ipv4.devconf_all = all;
1579         net->ipv4.devconf_dflt = dflt;
1580         return 0;
1581
1582 #ifdef CONFIG_SYSCTL
1583 err_reg_ctl:
1584         __devinet_sysctl_unregister(dflt);
1585 err_reg_dflt:
1586         __devinet_sysctl_unregister(all);
1587 err_reg_all:
1588         if (tbl != ctl_forward_entry)
1589                 kfree(tbl);
1590 err_alloc_ctl:
1591 #endif
1592         if (dflt != &ipv4_devconf_dflt)
1593                 kfree(dflt);
1594 err_alloc_dflt:
1595         if (all != &ipv4_devconf)
1596                 kfree(all);
1597 err_alloc_all:
1598         return err;
1599 }
1600
1601 static __net_exit void devinet_exit_net(struct net *net)
1602 {
1603 #ifdef CONFIG_SYSCTL
1604         struct ctl_table *tbl;
1605
1606         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1607         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1608         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1609         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1610         kfree(tbl);
1611 #endif
1612         kfree(net->ipv4.devconf_dflt);
1613         kfree(net->ipv4.devconf_all);
1614 }
1615
1616 static __net_initdata struct pernet_operations devinet_ops = {
1617         .init = devinet_init_net,
1618         .exit = devinet_exit_net,
1619 };
1620
1621 void __init devinet_init(void)
1622 {
1623         register_pernet_subsys(&devinet_ops);
1624
1625         register_gifconf(PF_INET, inet_gifconf);
1626         register_netdevice_notifier(&ip_netdev_notifier);
1627
1628         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1629         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1630         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1631 }
1632