2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
66 static struct ipv4_devconf ipv4_devconf = {
68 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 static struct ipv4_devconf ipv4_devconf_dflt = {
77 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 [IFA_LOCAL] = { .type = NLA_U32 },
90 [IFA_ADDRESS] = { .type = NLA_U32 },
91 [IFA_BROADCAST] = { .type = NLA_U32 },
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
104 static inline void devinet_sysctl_register(struct in_device *idev)
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
112 /* Locks all the inet devices. */
114 static struct in_ifaddr *inet_alloc_ifa(void)
116 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
119 static void inet_rcu_free_ifa(struct rcu_head *head)
121 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
123 in_dev_put(ifa->ifa_dev);
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
129 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
132 void in_dev_finish_destroy(struct in_device *idev)
134 struct net_device *dev = idev->dev;
136 WARN_ON(idev->ifa_list);
137 WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 idev, dev ? dev->name : "NIL");
144 pr_err("Freeing alive in_device %p\n", idev);
148 EXPORT_SYMBOL(in_dev_finish_destroy);
150 static struct in_device *inetdev_init(struct net_device *dev)
152 struct in_device *in_dev;
156 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
159 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 sizeof(in_dev->cnf));
161 in_dev->cnf.sysctl = NULL;
163 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 if (!in_dev->arp_parms)
166 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 dev_disable_lro(dev);
168 /* Reference in_dev->dev */
170 /* Account for reference dev->ip_ptr (below) */
173 devinet_sysctl_register(in_dev);
174 ip_mc_init_dev(in_dev);
175 if (dev->flags & IFF_UP)
178 /* we can receive as soon as ip_ptr is set -- do this last */
179 rcu_assign_pointer(dev->ip_ptr, in_dev);
188 static void in_dev_rcu_put(struct rcu_head *head)
190 struct in_device *idev = container_of(head, struct in_device, rcu_head);
194 static void inetdev_destroy(struct in_device *in_dev)
196 struct in_ifaddr *ifa;
197 struct net_device *dev;
205 ip_mc_destroy_dev(in_dev);
207 while ((ifa = in_dev->ifa_list) != NULL) {
208 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
212 rcu_assign_pointer(dev->ip_ptr, NULL);
214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
218 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
224 for_primary_ifa(in_dev) {
225 if (inet_ifa_match(a, ifa)) {
226 if (!b || inet_ifa_match(b, ifa)) {
231 } endfor_ifa(in_dev);
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 int destroy, struct nlmsghdr *nlh, u32 pid)
239 struct in_ifaddr *promote = NULL;
240 struct in_ifaddr *ifa, *ifa1 = *ifap;
241 struct in_ifaddr *last_prim = in_dev->ifa_list;
242 struct in_ifaddr *prev_prom = NULL;
243 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
247 /* 1. Deleting primary ifaddr forces deletion all secondaries
248 * unless alias promotion is set
251 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
254 while ((ifa = *ifap1) != NULL) {
255 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 ifa1->ifa_scope <= ifa->ifa_scope)
259 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 ifa1->ifa_mask != ifa->ifa_mask ||
261 !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 ifap1 = &ifa->ifa_next;
268 *ifap1 = ifa->ifa_next;
270 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 blocking_notifier_call_chain(&inetaddr_chain,
283 *ifap = ifa1->ifa_next;
285 /* 3. Announce address deletion */
287 /* Send message first, then call notifier.
288 At first sight, FIB update triggered by notifier
289 will refer to already deleted ifaddr, that could confuse
290 netlink listeners. It is not true: look, gated sees
291 that route deleted and if it still thinks that ifaddr
292 is valid, it will try to restore deleted routes... Grr.
293 So that, this order is correct.
295 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
301 prev_prom->ifa_next = promote->ifa_next;
302 promote->ifa_next = last_prim->ifa_next;
303 last_prim->ifa_next = promote;
306 promote->ifa_flags &= ~IFA_F_SECONDARY;
307 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 blocking_notifier_call_chain(&inetaddr_chain,
310 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 if (ifa1->ifa_mask != ifa->ifa_mask ||
312 !inet_ifa_match(ifa1->ifa_address, ifa))
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
325 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
331 struct in_device *in_dev = ifa->ifa_dev;
332 struct in_ifaddr *ifa1, **ifap, **last_primary;
336 if (!ifa->ifa_local) {
341 ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 last_primary = &in_dev->ifa_list;
344 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 ifap = &ifa1->ifa_next) {
346 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 ifa->ifa_scope <= ifa1->ifa_scope)
348 last_primary = &ifa1->ifa_next;
349 if (ifa1->ifa_mask == ifa->ifa_mask &&
350 inet_ifa_match(ifa1->ifa_address, ifa)) {
351 if (ifa1->ifa_local == ifa->ifa_local) {
355 if (ifa1->ifa_scope != ifa->ifa_scope) {
359 ifa->ifa_flags |= IFA_F_SECONDARY;
363 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 net_srandom(ifa->ifa_local);
368 ifa->ifa_next = *ifap;
371 /* Send message first, then call notifier.
372 Notifier will trigger FIB update, so that
373 listeners of netlink will know about new ifaddr */
374 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
382 return __inet_insert_ifa(ifa, NULL, 0);
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
387 struct in_device *in_dev = __in_dev_get_rtnl(dev);
395 ipv4_devconf_setall(in_dev);
396 if (ifa->ifa_dev != in_dev) {
397 WARN_ON(ifa->ifa_dev);
399 ifa->ifa_dev = in_dev;
401 if (ipv4_is_loopback(ifa->ifa_local))
402 ifa->ifa_scope = RT_SCOPE_HOST;
403 return inet_insert_ifa(ifa);
406 /* Caller must hold RCU or RTNL :
407 * We dont take a reference on found in_device
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
411 struct net_device *dev;
412 struct in_device *in_dev = NULL;
415 dev = dev_get_by_index_rcu(net, ifindex);
417 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
421 EXPORT_SYMBOL(inetdev_by_index);
423 /* Called only from RTNL semaphored context. No locks. */
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
430 for_primary_ifa(in_dev) {
431 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433 } endfor_ifa(in_dev);
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 struct net *net = sock_net(skb->sk);
440 struct nlattr *tb[IFA_MAX+1];
441 struct in_device *in_dev;
442 struct ifaddrmsg *ifm;
443 struct in_ifaddr *ifa, **ifap;
448 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
452 ifm = nlmsg_data(nlh);
453 in_dev = inetdev_by_index(net, ifm->ifa_index);
454 if (in_dev == NULL) {
459 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460 ifap = &ifa->ifa_next) {
462 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 if (tb[IFA_ADDRESS] &&
469 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477 err = -EADDRNOTAVAIL;
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
484 struct nlattr *tb[IFA_MAX+1];
485 struct in_ifaddr *ifa;
486 struct ifaddrmsg *ifm;
487 struct net_device *dev;
488 struct in_device *in_dev;
491 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495 ifm = nlmsg_data(nlh);
497 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 dev = __dev_get_by_index(net, ifm->ifa_index);
505 in_dev = __in_dev_get_rtnl(dev);
510 ifa = inet_alloc_ifa();
513 * A potential indev allocation can be left alive, it stays
514 * assigned to its device and is destroy with it.
518 ipv4_devconf_setall(in_dev);
521 if (tb[IFA_ADDRESS] == NULL)
522 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
524 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 ifa->ifa_flags = ifm->ifa_flags;
527 ifa->ifa_scope = ifm->ifa_scope;
528 ifa->ifa_dev = in_dev;
530 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
533 if (tb[IFA_BROADCAST])
534 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
539 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
549 struct net *net = sock_net(skb->sk);
550 struct in_ifaddr *ifa;
554 ifa = rtm_to_ifaddr(net, nlh);
558 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
562 * Determine a default network mask, based on the IP address.
565 static inline int inet_abc_len(__be32 addr)
567 int rc = -1; /* Something else, probably a multicast. */
569 if (ipv4_is_zeronet(addr))
572 __u32 haddr = ntohl(addr);
574 if (IN_CLASSA(haddr))
576 else if (IN_CLASSB(haddr))
578 else if (IN_CLASSC(haddr))
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 struct sockaddr_in sin_orig;
590 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591 struct in_device *in_dev;
592 struct in_ifaddr **ifap = NULL;
593 struct in_ifaddr *ifa = NULL;
594 struct net_device *dev;
597 int tryaddrmatch = 0;
600 * Fetch the caller's info block into kernel space
603 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
605 ifr.ifr_name[IFNAMSIZ - 1] = 0;
607 /* save original address for comparison */
608 memcpy(&sin_orig, sin, sizeof(*sin));
610 colon = strchr(ifr.ifr_name, ':');
614 dev_load(net, ifr.ifr_name);
617 case SIOCGIFADDR: /* Get interface address */
618 case SIOCGIFBRDADDR: /* Get the broadcast address */
619 case SIOCGIFDSTADDR: /* Get the destination address */
620 case SIOCGIFNETMASK: /* Get the netmask for the interface */
621 /* Note that these ioctls will not sleep,
622 so that we do not impose a lock.
623 One day we will be forced to put shlock here (I mean SMP)
625 tryaddrmatch = (sin_orig.sin_family == AF_INET);
626 memset(sin, 0, sizeof(*sin));
627 sin->sin_family = AF_INET;
632 if (!capable(CAP_NET_ADMIN))
635 case SIOCSIFADDR: /* Set interface address (and family) */
636 case SIOCSIFBRDADDR: /* Set the broadcast address */
637 case SIOCSIFDSTADDR: /* Set the destination address */
638 case SIOCSIFNETMASK: /* Set the netmask for the interface */
640 if (!capable(CAP_NET_ADMIN))
643 if (sin->sin_family != AF_INET)
654 dev = __dev_get_by_name(net, ifr.ifr_name);
661 in_dev = __in_dev_get_rtnl(dev);
664 /* Matthias Andree */
665 /* compare label and address (4.4BSD style) */
666 /* note: we only do this for a limited set of ioctls
667 and only if the original address family was AF_INET.
668 This is checked above. */
669 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr ==
678 /* we didn't get a match, maybe the application is
679 4.3BSD-style and passed in junk so we fall back to
680 comparing just the label */
682 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683 ifap = &ifa->ifa_next)
684 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
689 ret = -EADDRNOTAVAIL;
690 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
694 case SIOCGIFADDR: /* Get interface address */
695 sin->sin_addr.s_addr = ifa->ifa_local;
698 case SIOCGIFBRDADDR: /* Get the broadcast address */
699 sin->sin_addr.s_addr = ifa->ifa_broadcast;
702 case SIOCGIFDSTADDR: /* Get the destination address */
703 sin->sin_addr.s_addr = ifa->ifa_address;
706 case SIOCGIFNETMASK: /* Get the netmask for the interface */
707 sin->sin_addr.s_addr = ifa->ifa_mask;
712 ret = -EADDRNOTAVAIL;
716 if (!(ifr.ifr_flags & IFF_UP))
717 inet_del_ifa(in_dev, ifap, 1);
720 ret = dev_change_flags(dev, ifr.ifr_flags);
723 case SIOCSIFADDR: /* Set interface address (and family) */
725 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
730 ifa = inet_alloc_ifa();
734 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
739 if (ifa->ifa_local == sin->sin_addr.s_addr)
741 inet_del_ifa(in_dev, ifap, 0);
742 ifa->ifa_broadcast = 0;
746 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748 if (!(dev->flags & IFF_POINTOPOINT)) {
749 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751 if ((dev->flags & IFF_BROADCAST) &&
752 ifa->ifa_prefixlen < 31)
753 ifa->ifa_broadcast = ifa->ifa_address |
756 ifa->ifa_prefixlen = 32;
757 ifa->ifa_mask = inet_make_mask(32);
759 ret = inet_set_ifa(dev, ifa);
762 case SIOCSIFBRDADDR: /* Set the broadcast address */
764 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765 inet_del_ifa(in_dev, ifap, 0);
766 ifa->ifa_broadcast = sin->sin_addr.s_addr;
767 inet_insert_ifa(ifa);
771 case SIOCSIFDSTADDR: /* Set the destination address */
773 if (ifa->ifa_address == sin->sin_addr.s_addr)
776 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
779 inet_del_ifa(in_dev, ifap, 0);
780 ifa->ifa_address = sin->sin_addr.s_addr;
781 inet_insert_ifa(ifa);
784 case SIOCSIFNETMASK: /* Set the netmask for the interface */
787 * The mask we set must be legal.
790 if (bad_mask(sin->sin_addr.s_addr, 0))
793 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794 __be32 old_mask = ifa->ifa_mask;
795 inet_del_ifa(in_dev, ifap, 0);
796 ifa->ifa_mask = sin->sin_addr.s_addr;
797 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799 /* See if current broadcast address matches
800 * with current netmask, then recalculate
801 * the broadcast address. Otherwise it's a
802 * funny address, so don't touch it since
803 * the user seems to know what (s)he's doing...
805 if ((dev->flags & IFF_BROADCAST) &&
806 (ifa->ifa_prefixlen < 31) &&
807 (ifa->ifa_broadcast ==
808 (ifa->ifa_local|~old_mask))) {
809 ifa->ifa_broadcast = (ifa->ifa_local |
810 ~sin->sin_addr.s_addr);
812 inet_insert_ifa(ifa);
822 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
826 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 struct in_device *in_dev = __in_dev_get_rtnl(dev);
829 struct in_ifaddr *ifa;
836 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
841 if (len < (int) sizeof(ifr))
843 memset(&ifr, 0, sizeof(struct ifreq));
845 strcpy(ifr.ifr_name, ifa->ifa_label);
847 strcpy(ifr.ifr_name, dev->name);
849 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
853 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
857 buf += sizeof(struct ifreq);
858 len -= sizeof(struct ifreq);
859 done += sizeof(struct ifreq);
865 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
868 struct in_device *in_dev;
869 struct net *net = dev_net(dev);
872 in_dev = __in_dev_get_rcu(dev);
876 for_primary_ifa(in_dev) {
877 if (ifa->ifa_scope > scope)
879 if (!dst || inet_ifa_match(dst, ifa)) {
880 addr = ifa->ifa_local;
884 addr = ifa->ifa_local;
885 } endfor_ifa(in_dev);
891 /* Not loopback addresses on loopback should be preferred
892 in this case. It is importnat that lo is the first interface
895 for_each_netdev_rcu(net, dev) {
896 in_dev = __in_dev_get_rcu(dev);
900 for_primary_ifa(in_dev) {
901 if (ifa->ifa_scope != RT_SCOPE_LINK &&
902 ifa->ifa_scope <= scope) {
903 addr = ifa->ifa_local;
906 } endfor_ifa(in_dev);
912 EXPORT_SYMBOL(inet_select_addr);
914 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915 __be32 local, int scope)
922 (local == ifa->ifa_local || !local) &&
923 ifa->ifa_scope <= scope) {
924 addr = ifa->ifa_local;
929 same = (!local || inet_ifa_match(local, ifa)) &&
930 (!dst || inet_ifa_match(dst, ifa));
934 /* Is the selected addr into dst subnet? */
935 if (inet_ifa_match(addr, ifa))
937 /* No, then can we use new local src? */
938 if (ifa->ifa_scope <= scope) {
939 addr = ifa->ifa_local;
942 /* search for large dst subnet for addr */
946 } endfor_ifa(in_dev);
948 return same ? addr : 0;
952 * Confirm that local IP address exists using wildcards:
953 * - in_dev: only on this interface, 0=any interface
954 * - dst: only in the same subnet as dst, 0=any dst
955 * - local: address, 0=autoselect the local address
956 * - scope: maximum allowed scope value for the local address
958 __be32 inet_confirm_addr(struct in_device *in_dev,
959 __be32 dst, __be32 local, int scope)
962 struct net_device *dev;
965 if (scope != RT_SCOPE_LINK)
966 return confirm_addr_indev(in_dev, dst, local, scope);
968 net = dev_net(in_dev->dev);
970 for_each_netdev_rcu(net, dev) {
971 in_dev = __in_dev_get_rcu(dev);
973 addr = confirm_addr_indev(in_dev, dst, local, scope);
987 int register_inetaddr_notifier(struct notifier_block *nb)
989 return blocking_notifier_chain_register(&inetaddr_chain, nb);
991 EXPORT_SYMBOL(register_inetaddr_notifier);
993 int unregister_inetaddr_notifier(struct notifier_block *nb)
995 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
997 EXPORT_SYMBOL(unregister_inetaddr_notifier);
999 /* Rename ifa_labels for a device name change. Make some effort to preserve
1000 * existing alias numbering and to create unique labels if possible.
1002 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1004 struct in_ifaddr *ifa;
1007 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008 char old[IFNAMSIZ], *dot;
1010 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1014 dot = strchr(old, ':');
1016 sprintf(old, ":%d", named);
1019 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020 strcat(ifa->ifa_label, dot);
1022 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1024 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1028 static inline bool inetdev_valid_mtu(unsigned mtu)
1033 /* Called only under RTNL semaphore */
1035 static int inetdev_event(struct notifier_block *this, unsigned long event,
1038 struct net_device *dev = ptr;
1039 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1044 if (event == NETDEV_REGISTER) {
1045 in_dev = inetdev_init(dev);
1047 return notifier_from_errno(-ENOMEM);
1048 if (dev->flags & IFF_LOOPBACK) {
1049 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1050 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1052 } else if (event == NETDEV_CHANGEMTU) {
1053 /* Re-enabling IP */
1054 if (inetdev_valid_mtu(dev->mtu))
1055 in_dev = inetdev_init(dev);
1061 case NETDEV_REGISTER:
1062 printk(KERN_DEBUG "inetdev_event: bug\n");
1063 rcu_assign_pointer(dev->ip_ptr, NULL);
1066 if (!inetdev_valid_mtu(dev->mtu))
1068 if (dev->flags & IFF_LOOPBACK) {
1069 struct in_ifaddr *ifa = inet_alloc_ifa();
1073 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1074 ifa->ifa_prefixlen = 8;
1075 ifa->ifa_mask = inet_make_mask(8);
1076 in_dev_hold(in_dev);
1077 ifa->ifa_dev = in_dev;
1078 ifa->ifa_scope = RT_SCOPE_HOST;
1079 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1080 inet_insert_ifa(ifa);
1085 case NETDEV_NOTIFY_PEERS:
1086 case NETDEV_CHANGEADDR:
1087 /* Send gratuitous ARP to notify of link change */
1088 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1089 struct in_ifaddr *ifa = in_dev->ifa_list;
1092 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1093 ifa->ifa_address, dev,
1094 ifa->ifa_address, NULL,
1095 dev->dev_addr, NULL);
1101 case NETDEV_PRE_TYPE_CHANGE:
1102 ip_mc_unmap(in_dev);
1104 case NETDEV_POST_TYPE_CHANGE:
1105 ip_mc_remap(in_dev);
1107 case NETDEV_CHANGEMTU:
1108 if (inetdev_valid_mtu(dev->mtu))
1110 /* disable IP when MTU is not enough */
1111 case NETDEV_UNREGISTER:
1112 inetdev_destroy(in_dev);
1114 case NETDEV_CHANGENAME:
1115 /* Do not notify about label change, this event is
1116 * not interesting to applications using netlink.
1118 inetdev_changename(dev, in_dev);
1120 devinet_sysctl_unregister(in_dev);
1121 devinet_sysctl_register(in_dev);
1128 static struct notifier_block ip_netdev_notifier = {
1129 .notifier_call = inetdev_event,
1132 static inline size_t inet_nlmsg_size(void)
1134 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1135 + nla_total_size(4) /* IFA_ADDRESS */
1136 + nla_total_size(4) /* IFA_LOCAL */
1137 + nla_total_size(4) /* IFA_BROADCAST */
1138 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1141 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1142 u32 pid, u32 seq, int event, unsigned int flags)
1144 struct ifaddrmsg *ifm;
1145 struct nlmsghdr *nlh;
1147 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1151 ifm = nlmsg_data(nlh);
1152 ifm->ifa_family = AF_INET;
1153 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1154 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1155 ifm->ifa_scope = ifa->ifa_scope;
1156 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1158 if (ifa->ifa_address)
1159 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1162 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1164 if (ifa->ifa_broadcast)
1165 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1167 if (ifa->ifa_label[0])
1168 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1170 return nlmsg_end(skb, nlh);
1173 nlmsg_cancel(skb, nlh);
1177 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1179 struct net *net = sock_net(skb->sk);
1182 int ip_idx, s_ip_idx;
1183 struct net_device *dev;
1184 struct in_device *in_dev;
1185 struct in_ifaddr *ifa;
1186 struct hlist_head *head;
1187 struct hlist_node *node;
1190 s_idx = idx = cb->args[1];
1191 s_ip_idx = ip_idx = cb->args[2];
1193 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1195 head = &net->dev_index_head[h];
1197 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1200 if (h > s_h || idx > s_idx)
1202 in_dev = __in_dev_get_rcu(dev);
1206 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1207 ifa = ifa->ifa_next, ip_idx++) {
1208 if (ip_idx < s_ip_idx)
1210 if (inet_fill_ifaddr(skb, ifa,
1211 NETLINK_CB(cb->skb).pid,
1213 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1227 cb->args[2] = ip_idx;
1232 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1235 struct sk_buff *skb;
1236 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1240 net = dev_net(ifa->ifa_dev->dev);
1241 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1245 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1247 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1248 WARN_ON(err == -EMSGSIZE);
1252 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1256 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1259 #ifdef CONFIG_SYSCTL
1261 static void devinet_copy_dflt_conf(struct net *net, int i)
1263 struct net_device *dev;
1266 for_each_netdev_rcu(net, dev) {
1267 struct in_device *in_dev;
1269 in_dev = __in_dev_get_rcu(dev);
1270 if (in_dev && !test_bit(i, in_dev->cnf.state))
1271 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1276 /* called with RTNL locked */
1277 static void inet_forward_change(struct net *net)
1279 struct net_device *dev;
1280 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1282 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1283 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1285 for_each_netdev(net, dev) {
1286 struct in_device *in_dev;
1288 dev_disable_lro(dev);
1290 in_dev = __in_dev_get_rcu(dev);
1292 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1297 static int devinet_conf_proc(ctl_table *ctl, int write,
1298 void __user *buffer,
1299 size_t *lenp, loff_t *ppos)
1301 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1304 struct ipv4_devconf *cnf = ctl->extra1;
1305 struct net *net = ctl->extra2;
1306 int i = (int *)ctl->data - cnf->data;
1308 set_bit(i, cnf->state);
1310 if (cnf == net->ipv4.devconf_dflt)
1311 devinet_copy_dflt_conf(net, i);
1317 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1318 void __user *buffer,
1319 size_t *lenp, loff_t *ppos)
1321 int *valp = ctl->data;
1324 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1326 if (write && *valp != val) {
1327 struct net *net = ctl->extra2;
1329 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1330 if (!rtnl_trylock()) {
1331 /* Restore the original values before restarting */
1334 return restart_syscall();
1336 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1337 inet_forward_change(net);
1339 struct ipv4_devconf *cnf = ctl->extra1;
1340 struct in_device *idev =
1341 container_of(cnf, struct in_device, cnf);
1342 dev_disable_lro(idev->dev);
1345 rt_cache_flush(net, 0);
1352 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1353 void __user *buffer,
1354 size_t *lenp, loff_t *ppos)
1356 int *valp = ctl->data;
1358 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1359 struct net *net = ctl->extra2;
1361 if (write && *valp != val)
1362 rt_cache_flush(net, 0);
1367 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1370 .data = ipv4_devconf.data + \
1371 IPV4_DEVCONF_ ## attr - 1, \
1372 .maxlen = sizeof(int), \
1374 .proc_handler = proc, \
1375 .extra1 = &ipv4_devconf, \
1378 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1379 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1381 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1382 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1384 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1385 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1387 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1388 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1390 static struct devinet_sysctl_table {
1391 struct ctl_table_header *sysctl_header;
1392 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1394 } devinet_sysctl = {
1396 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1397 devinet_sysctl_forward),
1398 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1400 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1401 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1402 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1403 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1404 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1405 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1406 "accept_source_route"),
1407 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1408 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1409 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1410 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1411 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1412 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1413 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1414 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1415 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1416 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1417 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1418 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1419 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1421 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1422 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1423 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1424 "force_igmp_version"),
1425 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1426 "promote_secondaries"),
1430 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1431 struct ipv4_devconf *p)
1434 struct devinet_sysctl_table *t;
1436 #define DEVINET_CTL_PATH_DEV 3
1438 struct ctl_path devinet_ctl_path[] = {
1439 { .procname = "net", },
1440 { .procname = "ipv4", },
1441 { .procname = "conf", },
1442 { /* to be set */ },
1446 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1450 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1451 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1452 t->devinet_vars[i].extra1 = p;
1453 t->devinet_vars[i].extra2 = net;
1457 * Make a copy of dev_name, because '.procname' is regarded as const
1458 * by sysctl and we wouldn't want anyone to change it under our feet
1459 * (see SIOCSIFNAME).
1461 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1465 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1467 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1469 if (!t->sysctl_header)
1483 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1485 struct devinet_sysctl_table *t = cnf->sysctl;
1491 unregister_sysctl_table(t->sysctl_header);
1496 static void devinet_sysctl_register(struct in_device *idev)
1498 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1499 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1503 static void devinet_sysctl_unregister(struct in_device *idev)
1505 __devinet_sysctl_unregister(&idev->cnf);
1506 neigh_sysctl_unregister(idev->arp_parms);
1509 static struct ctl_table ctl_forward_entry[] = {
1511 .procname = "ip_forward",
1512 .data = &ipv4_devconf.data[
1513 IPV4_DEVCONF_FORWARDING - 1],
1514 .maxlen = sizeof(int),
1516 .proc_handler = devinet_sysctl_forward,
1517 .extra1 = &ipv4_devconf,
1518 .extra2 = &init_net,
1523 static __net_initdata struct ctl_path net_ipv4_path[] = {
1524 { .procname = "net", },
1525 { .procname = "ipv4", },
1530 static __net_init int devinet_init_net(struct net *net)
1533 struct ipv4_devconf *all, *dflt;
1534 #ifdef CONFIG_SYSCTL
1535 struct ctl_table *tbl = ctl_forward_entry;
1536 struct ctl_table_header *forw_hdr;
1540 all = &ipv4_devconf;
1541 dflt = &ipv4_devconf_dflt;
1543 if (!net_eq(net, &init_net)) {
1544 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1548 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1550 goto err_alloc_dflt;
1552 #ifdef CONFIG_SYSCTL
1553 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1557 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1558 tbl[0].extra1 = all;
1559 tbl[0].extra2 = net;
1563 #ifdef CONFIG_SYSCTL
1564 err = __devinet_sysctl_register(net, "all", all);
1568 err = __devinet_sysctl_register(net, "default", dflt);
1573 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1574 if (forw_hdr == NULL)
1576 net->ipv4.forw_hdr = forw_hdr;
1579 net->ipv4.devconf_all = all;
1580 net->ipv4.devconf_dflt = dflt;
1583 #ifdef CONFIG_SYSCTL
1585 __devinet_sysctl_unregister(dflt);
1587 __devinet_sysctl_unregister(all);
1589 if (tbl != ctl_forward_entry)
1593 if (dflt != &ipv4_devconf_dflt)
1596 if (all != &ipv4_devconf)
1602 static __net_exit void devinet_exit_net(struct net *net)
1604 #ifdef CONFIG_SYSCTL
1605 struct ctl_table *tbl;
1607 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1608 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1609 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1610 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1613 kfree(net->ipv4.devconf_dflt);
1614 kfree(net->ipv4.devconf_all);
1617 static __net_initdata struct pernet_operations devinet_ops = {
1618 .init = devinet_init_net,
1619 .exit = devinet_exit_net,
1622 void __init devinet_init(void)
1624 register_pernet_subsys(&devinet_ops);
1626 register_gifconf(PF_INET, inet_gifconf);
1627 register_netdevice_notifier(&ip_netdev_notifier);
1629 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1630 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1631 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);