1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET3 IP device support routines.
5 * Derived from the IP parts of dev.c 1.0.19
7 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 * Mark Evans, <evansmp@uhura.aston.ac.uk>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
15 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
17 * Cyrus Durgin: updated for kmod
18 * Matthias Andree: in devinet_ioctl, compare label and
19 * address (4.4BSD alias style support),
20 * fall back to comparing just the label
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
52 #include <linux/sysctl.h>
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
65 #define IPV6ONLY_FLAGS \
66 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
78 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
82 static struct ipv4_devconf ipv4_devconf_dflt = {
84 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
91 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 [IFA_LOCAL] = { .type = NLA_U32 },
100 [IFA_ADDRESS] = { .type = NLA_U32 },
101 [IFA_BROADCAST] = { .type = NLA_U32 },
102 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
104 [IFA_FLAGS] = { .type = NLA_U32 },
105 [IFA_RT_PRIORITY] = { .type = NLA_U32 },
106 [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
107 [IFA_PROTO] = { .type = NLA_U8 },
110 struct inet_fill_args {
119 #define IN4_ADDR_HSIZE_SHIFT 8
120 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
126 u32 val = (__force u32) addr ^ net_hash_mix(net);
128 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
133 u32 hash = inet_addr_hash(net, ifa->ifa_local);
136 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
139 static void inet_hash_remove(struct in_ifaddr *ifa)
142 hlist_del_init_rcu(&ifa->hash);
146 * __ip_dev_find - find the first device with a given source address.
147 * @net: the net namespace
148 * @addr: the source address
149 * @devref: if true, take a reference on the found device
151 * If a caller uses devref=false, it should be protected by RCU, or RTNL
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
155 struct net_device *result = NULL;
156 struct in_ifaddr *ifa;
159 ifa = inet_lookup_ifaddr_rcu(net, addr);
161 struct flowi4 fl4 = { .daddr = addr };
162 struct fib_result res = { 0 };
163 struct fib_table *local;
165 /* Fallback to FIB local table so that communication
166 * over loopback subnets work.
168 local = fib_get_table(net, RT_TABLE_LOCAL);
170 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 res.type == RTN_LOCAL)
172 result = FIB_RES_DEV(res);
174 result = ifa->ifa_dev->dev;
176 if (result && devref)
181 EXPORT_SYMBOL(__ip_dev_find);
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
186 u32 hash = inet_addr_hash(net, addr);
187 struct in_ifaddr *ifa;
189 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 if (ifa->ifa_local == addr &&
191 net_eq(dev_net(ifa->ifa_dev->dev), net))
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 struct in_ifaddr __rcu **ifap,
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
208 static int devinet_sysctl_register(struct in_device *idev)
212 static void devinet_sysctl_unregister(struct in_device *idev)
217 /* Locks all the inet devices. */
219 static struct in_ifaddr *inet_alloc_ifa(void)
221 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
224 static void inet_rcu_free_ifa(struct rcu_head *head)
226 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
228 in_dev_put(ifa->ifa_dev);
232 static void inet_free_ifa(struct in_ifaddr *ifa)
234 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
237 void in_dev_finish_destroy(struct in_device *idev)
239 struct net_device *dev = idev->dev;
241 WARN_ON(idev->ifa_list);
242 WARN_ON(idev->mc_list);
243 kfree(rcu_dereference_protected(idev->mc_hash, 1));
244 #ifdef NET_REFCNT_DEBUG
245 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
247 netdev_put(dev, &idev->dev_tracker);
249 pr_err("Freeing alive in_device %p\n", idev);
253 EXPORT_SYMBOL(in_dev_finish_destroy);
255 static struct in_device *inetdev_init(struct net_device *dev)
257 struct in_device *in_dev;
262 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
265 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
266 sizeof(in_dev->cnf));
267 in_dev->cnf.sysctl = NULL;
269 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
270 if (!in_dev->arp_parms)
272 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
273 dev_disable_lro(dev);
274 /* Reference in_dev->dev */
275 netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
276 /* Account for reference dev->ip_ptr (below) */
277 refcount_set(&in_dev->refcnt, 1);
279 err = devinet_sysctl_register(in_dev);
282 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
287 ip_mc_init_dev(in_dev);
288 if (dev->flags & IFF_UP)
291 /* we can receive as soon as ip_ptr is set -- do this last */
292 rcu_assign_pointer(dev->ip_ptr, in_dev);
294 return in_dev ?: ERR_PTR(err);
301 static void in_dev_rcu_put(struct rcu_head *head)
303 struct in_device *idev = container_of(head, struct in_device, rcu_head);
307 static void inetdev_destroy(struct in_device *in_dev)
309 struct net_device *dev;
310 struct in_ifaddr *ifa;
318 ip_mc_destroy_dev(in_dev);
320 while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
321 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
325 RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 devinet_sysctl_unregister(in_dev);
328 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
331 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
334 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 const struct in_ifaddr *ifa;
339 in_dev_for_each_ifa_rcu(ifa, in_dev) {
340 if (inet_ifa_match(a, ifa)) {
341 if (!b || inet_ifa_match(b, ifa)) {
351 static void __inet_del_ifa(struct in_device *in_dev,
352 struct in_ifaddr __rcu **ifap,
353 int destroy, struct nlmsghdr *nlh, u32 portid)
355 struct in_ifaddr *promote = NULL;
356 struct in_ifaddr *ifa, *ifa1;
357 struct in_ifaddr *last_prim;
358 struct in_ifaddr *prev_prom = NULL;
359 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
363 ifa1 = rtnl_dereference(*ifap);
364 last_prim = rtnl_dereference(in_dev->ifa_list);
368 /* 1. Deleting primary ifaddr forces deletion all secondaries
369 * unless alias promotion is set
372 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
373 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
376 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
377 ifa1->ifa_scope <= ifa->ifa_scope)
380 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
381 ifa1->ifa_mask != ifa->ifa_mask ||
382 !inet_ifa_match(ifa1->ifa_address, ifa)) {
383 ifap1 = &ifa->ifa_next;
389 inet_hash_remove(ifa);
390 *ifap1 = ifa->ifa_next;
392 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
393 blocking_notifier_call_chain(&inetaddr_chain,
403 /* On promotion all secondaries from subnet are changing
404 * the primary IP, we must remove all their routes silently
405 * and later to add them back with new prefsrc. Do this
406 * while all addresses are on the device list.
408 for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
409 if (ifa1->ifa_mask == ifa->ifa_mask &&
410 inet_ifa_match(ifa1->ifa_address, ifa))
411 fib_del_ifaddr(ifa, ifa1);
417 *ifap = ifa1->ifa_next;
418 inet_hash_remove(ifa1);
420 /* 3. Announce address deletion */
422 /* Send message first, then call notifier.
423 At first sight, FIB update triggered by notifier
424 will refer to already deleted ifaddr, that could confuse
425 netlink listeners. It is not true: look, gated sees
426 that route deleted and if it still thinks that ifaddr
427 is valid, it will try to restore deleted routes... Grr.
428 So that, this order is correct.
430 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
431 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
434 struct in_ifaddr *next_sec;
436 next_sec = rtnl_dereference(promote->ifa_next);
438 struct in_ifaddr *last_sec;
440 rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 last_sec = rtnl_dereference(last_prim->ifa_next);
443 rcu_assign_pointer(promote->ifa_next, last_sec);
444 rcu_assign_pointer(last_prim->ifa_next, promote);
447 promote->ifa_flags &= ~IFA_F_SECONDARY;
448 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
449 blocking_notifier_call_chain(&inetaddr_chain,
451 for (ifa = next_sec; ifa;
452 ifa = rtnl_dereference(ifa->ifa_next)) {
453 if (ifa1->ifa_mask != ifa->ifa_mask ||
454 !inet_ifa_match(ifa1->ifa_address, ifa))
464 static void inet_del_ifa(struct in_device *in_dev,
465 struct in_ifaddr __rcu **ifap,
468 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
471 static void check_lifetime(struct work_struct *work);
473 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
476 u32 portid, struct netlink_ext_ack *extack)
478 struct in_ifaddr __rcu **last_primary, **ifap;
479 struct in_device *in_dev = ifa->ifa_dev;
480 struct in_validator_info ivi;
481 struct in_ifaddr *ifa1;
486 if (!ifa->ifa_local) {
491 ifa->ifa_flags &= ~IFA_F_SECONDARY;
492 last_primary = &in_dev->ifa_list;
494 /* Don't set IPv6 only flags to IPv4 addresses */
495 ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 ifap = &in_dev->ifa_list;
498 ifa1 = rtnl_dereference(*ifap);
501 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
502 ifa->ifa_scope <= ifa1->ifa_scope)
503 last_primary = &ifa1->ifa_next;
504 if (ifa1->ifa_mask == ifa->ifa_mask &&
505 inet_ifa_match(ifa1->ifa_address, ifa)) {
506 if (ifa1->ifa_local == ifa->ifa_local) {
510 if (ifa1->ifa_scope != ifa->ifa_scope) {
514 ifa->ifa_flags |= IFA_F_SECONDARY;
517 ifap = &ifa1->ifa_next;
518 ifa1 = rtnl_dereference(*ifap);
521 /* Allow any devices that wish to register ifaddr validtors to weigh
522 * in now, before changes are committed. The rntl lock is serializing
523 * access here, so the state should not change between a validator call
524 * and a final notify on commit. This isn't invoked on promotion under
525 * the assumption that validators are checking the address itself, and
528 ivi.ivi_addr = ifa->ifa_address;
529 ivi.ivi_dev = ifa->ifa_dev;
531 ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
533 ret = notifier_to_errno(ret);
539 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 rcu_assign_pointer(ifa->ifa_next, *ifap);
543 rcu_assign_pointer(*ifap, ifa);
545 inet_hash_insert(dev_net(in_dev->dev), ifa);
547 cancel_delayed_work(&check_lifetime_work);
548 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
550 /* Send message first, then call notifier.
551 Notifier will trigger FIB update, so that
552 listeners of netlink will know about new ifaddr */
553 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
554 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
559 static int inet_insert_ifa(struct in_ifaddr *ifa)
561 return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
566 struct in_device *in_dev = __in_dev_get_rtnl(dev);
574 ipv4_devconf_setall(in_dev);
575 neigh_parms_data_state_setall(in_dev->arp_parms);
576 if (ifa->ifa_dev != in_dev) {
577 WARN_ON(ifa->ifa_dev);
579 ifa->ifa_dev = in_dev;
581 if (ipv4_is_loopback(ifa->ifa_local))
582 ifa->ifa_scope = RT_SCOPE_HOST;
583 return inet_insert_ifa(ifa);
586 /* Caller must hold RCU or RTNL :
587 * We dont take a reference on found in_device
589 struct in_device *inetdev_by_index(struct net *net, int ifindex)
591 struct net_device *dev;
592 struct in_device *in_dev = NULL;
595 dev = dev_get_by_index_rcu(net, ifindex);
597 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
601 EXPORT_SYMBOL(inetdev_by_index);
603 /* Called only from RTNL semaphored context. No locks. */
605 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 struct in_ifaddr *ifa;
612 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
613 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
619 static int ip_mc_autojoin_config(struct net *net, bool join,
620 const struct in_ifaddr *ifa)
622 #if defined(CONFIG_IP_MULTICAST)
623 struct ip_mreqn mreq = {
624 .imr_multiaddr.s_addr = ifa->ifa_address,
625 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
627 struct sock *sk = net->ipv4.mc_autojoin_sk;
634 ret = ip_mc_join_group(sk, &mreq);
636 ret = ip_mc_leave_group(sk, &mreq);
645 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
646 struct netlink_ext_ack *extack)
648 struct net *net = sock_net(skb->sk);
649 struct in_ifaddr __rcu **ifap;
650 struct nlattr *tb[IFA_MAX+1];
651 struct in_device *in_dev;
652 struct ifaddrmsg *ifm;
653 struct in_ifaddr *ifa;
658 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
659 ifa_ipv4_policy, extack);
663 ifm = nlmsg_data(nlh);
664 in_dev = inetdev_by_index(net, ifm->ifa_index);
670 for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
671 ifap = &ifa->ifa_next) {
673 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
676 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
679 if (tb[IFA_ADDRESS] &&
680 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
681 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
684 if (ipv4_is_multicast(ifa->ifa_address))
685 ip_mc_autojoin_config(net, false, ifa);
686 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690 err = -EADDRNOTAVAIL;
695 #define INFINITY_LIFE_TIME 0xFFFFFFFF
697 static void check_lifetime(struct work_struct *work)
699 unsigned long now, next, next_sec, next_sched;
700 struct in_ifaddr *ifa;
701 struct hlist_node *n;
705 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
707 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
708 bool change_needed = false;
711 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
714 if (ifa->ifa_flags & IFA_F_PERMANENT)
717 /* We try to batch several events at once. */
718 age = (now - ifa->ifa_tstamp +
719 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722 age >= ifa->ifa_valid_lft) {
723 change_needed = true;
724 } else if (ifa->ifa_preferred_lft ==
725 INFINITY_LIFE_TIME) {
727 } else if (age >= ifa->ifa_preferred_lft) {
728 if (time_before(ifa->ifa_tstamp +
729 ifa->ifa_valid_lft * HZ, next))
730 next = ifa->ifa_tstamp +
731 ifa->ifa_valid_lft * HZ;
733 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
734 change_needed = true;
735 } else if (time_before(ifa->ifa_tstamp +
736 ifa->ifa_preferred_lft * HZ,
738 next = ifa->ifa_tstamp +
739 ifa->ifa_preferred_lft * HZ;
746 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
749 if (ifa->ifa_flags & IFA_F_PERMANENT)
752 /* We try to batch several events at once. */
753 age = (now - ifa->ifa_tstamp +
754 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
756 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
757 age >= ifa->ifa_valid_lft) {
758 struct in_ifaddr __rcu **ifap;
759 struct in_ifaddr *tmp;
761 ifap = &ifa->ifa_dev->ifa_list;
762 tmp = rtnl_dereference(*ifap);
765 inet_del_ifa(ifa->ifa_dev,
769 ifap = &tmp->ifa_next;
770 tmp = rtnl_dereference(*ifap);
772 } else if (ifa->ifa_preferred_lft !=
773 INFINITY_LIFE_TIME &&
774 age >= ifa->ifa_preferred_lft &&
775 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
776 ifa->ifa_flags |= IFA_F_DEPRECATED;
777 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
783 next_sec = round_jiffies_up(next);
786 /* If rounded timeout is accurate enough, accept it. */
787 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
788 next_sched = next_sec;
791 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
792 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
793 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
795 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
799 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
802 unsigned long timeout;
804 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
806 timeout = addrconf_timeout_fixup(valid_lft, HZ);
807 if (addrconf_finite_timeout(timeout))
808 ifa->ifa_valid_lft = timeout;
810 ifa->ifa_flags |= IFA_F_PERMANENT;
812 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
813 if (addrconf_finite_timeout(timeout)) {
815 ifa->ifa_flags |= IFA_F_DEPRECATED;
816 ifa->ifa_preferred_lft = timeout;
818 ifa->ifa_tstamp = jiffies;
819 if (!ifa->ifa_cstamp)
820 ifa->ifa_cstamp = ifa->ifa_tstamp;
823 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
824 __u32 *pvalid_lft, __u32 *pprefered_lft,
825 struct netlink_ext_ack *extack)
827 struct nlattr *tb[IFA_MAX+1];
828 struct in_ifaddr *ifa;
829 struct ifaddrmsg *ifm;
830 struct net_device *dev;
831 struct in_device *in_dev;
834 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
835 ifa_ipv4_policy, extack);
839 ifm = nlmsg_data(nlh);
841 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
844 dev = __dev_get_by_index(net, ifm->ifa_index);
849 in_dev = __in_dev_get_rtnl(dev);
854 ifa = inet_alloc_ifa();
857 * A potential indev allocation can be left alive, it stays
858 * assigned to its device and is destroy with it.
862 ipv4_devconf_setall(in_dev);
863 neigh_parms_data_state_setall(in_dev->arp_parms);
866 if (!tb[IFA_ADDRESS])
867 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
869 INIT_HLIST_NODE(&ifa->hash);
870 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
871 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
872 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
874 ifa->ifa_scope = ifm->ifa_scope;
875 ifa->ifa_dev = in_dev;
877 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
878 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880 if (tb[IFA_BROADCAST])
881 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
884 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
886 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
888 if (tb[IFA_RT_PRIORITY])
889 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
892 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
894 if (tb[IFA_CACHEINFO]) {
895 struct ifa_cacheinfo *ci;
897 ci = nla_data(tb[IFA_CACHEINFO]);
898 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
902 *pvalid_lft = ci->ifa_valid;
903 *pprefered_lft = ci->ifa_prefered;
914 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
916 struct in_device *in_dev = ifa->ifa_dev;
917 struct in_ifaddr *ifa1;
922 in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
923 if (ifa1->ifa_mask == ifa->ifa_mask &&
924 inet_ifa_match(ifa1->ifa_address, ifa) &&
925 ifa1->ifa_local == ifa->ifa_local)
931 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
932 struct netlink_ext_ack *extack)
934 struct net *net = sock_net(skb->sk);
935 struct in_ifaddr *ifa;
936 struct in_ifaddr *ifa_existing;
937 __u32 valid_lft = INFINITY_LIFE_TIME;
938 __u32 prefered_lft = INFINITY_LIFE_TIME;
942 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
946 ifa_existing = find_matching_ifa(ifa);
948 /* It would be best to check for !NLM_F_CREATE here but
949 * userspace already relies on not having to provide this.
951 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
952 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
953 int ret = ip_mc_autojoin_config(net, true, ifa);
960 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
963 u32 new_metric = ifa->ifa_rt_priority;
967 if (nlh->nlmsg_flags & NLM_F_EXCL ||
968 !(nlh->nlmsg_flags & NLM_F_REPLACE))
972 if (ifa->ifa_rt_priority != new_metric) {
973 fib_modify_prefix_metric(ifa, new_metric);
974 ifa->ifa_rt_priority = new_metric;
977 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
978 cancel_delayed_work(&check_lifetime_work);
979 queue_delayed_work(system_power_efficient_wq,
980 &check_lifetime_work, 0);
981 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
987 * Determine a default network mask, based on the IP address.
990 static int inet_abc_len(__be32 addr)
992 int rc = -1; /* Something else, probably a multicast. */
994 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
997 __u32 haddr = ntohl(addr);
998 if (IN_CLASSA(haddr))
1000 else if (IN_CLASSB(haddr))
1002 else if (IN_CLASSC(haddr))
1004 else if (IN_CLASSE(haddr))
1012 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1014 struct sockaddr_in sin_orig;
1015 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1016 struct in_ifaddr __rcu **ifap = NULL;
1017 struct in_device *in_dev;
1018 struct in_ifaddr *ifa = NULL;
1019 struct net_device *dev;
1022 int tryaddrmatch = 0;
1024 ifr->ifr_name[IFNAMSIZ - 1] = 0;
1026 /* save original address for comparison */
1027 memcpy(&sin_orig, sin, sizeof(*sin));
1029 colon = strchr(ifr->ifr_name, ':');
1033 dev_load(net, ifr->ifr_name);
1036 case SIOCGIFADDR: /* Get interface address */
1037 case SIOCGIFBRDADDR: /* Get the broadcast address */
1038 case SIOCGIFDSTADDR: /* Get the destination address */
1039 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1040 /* Note that these ioctls will not sleep,
1041 so that we do not impose a lock.
1042 One day we will be forced to put shlock here (I mean SMP)
1044 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1045 memset(sin, 0, sizeof(*sin));
1046 sin->sin_family = AF_INET;
1051 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1054 case SIOCSIFADDR: /* Set interface address (and family) */
1055 case SIOCSIFBRDADDR: /* Set the broadcast address */
1056 case SIOCSIFDSTADDR: /* Set the destination address */
1057 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1059 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1062 if (sin->sin_family != AF_INET)
1073 dev = __dev_get_by_name(net, ifr->ifr_name);
1080 in_dev = __in_dev_get_rtnl(dev);
1083 /* Matthias Andree */
1084 /* compare label and address (4.4BSD style) */
1085 /* note: we only do this for a limited set of ioctls
1086 and only if the original address family was AF_INET.
1087 This is checked above. */
1089 for (ifap = &in_dev->ifa_list;
1090 (ifa = rtnl_dereference(*ifap)) != NULL;
1091 ifap = &ifa->ifa_next) {
1092 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1093 sin_orig.sin_addr.s_addr ==
1099 /* we didn't get a match, maybe the application is
1100 4.3BSD-style and passed in junk so we fall back to
1101 comparing just the label */
1103 for (ifap = &in_dev->ifa_list;
1104 (ifa = rtnl_dereference(*ifap)) != NULL;
1105 ifap = &ifa->ifa_next)
1106 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1111 ret = -EADDRNOTAVAIL;
1112 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1116 case SIOCGIFADDR: /* Get interface address */
1118 sin->sin_addr.s_addr = ifa->ifa_local;
1121 case SIOCGIFBRDADDR: /* Get the broadcast address */
1123 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1126 case SIOCGIFDSTADDR: /* Get the destination address */
1128 sin->sin_addr.s_addr = ifa->ifa_address;
1131 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1133 sin->sin_addr.s_addr = ifa->ifa_mask;
1138 ret = -EADDRNOTAVAIL;
1142 if (!(ifr->ifr_flags & IFF_UP))
1143 inet_del_ifa(in_dev, ifap, 1);
1146 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1149 case SIOCSIFADDR: /* Set interface address (and family) */
1151 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1156 ifa = inet_alloc_ifa();
1159 INIT_HLIST_NODE(&ifa->hash);
1161 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1163 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1166 if (ifa->ifa_local == sin->sin_addr.s_addr)
1168 inet_del_ifa(in_dev, ifap, 0);
1169 ifa->ifa_broadcast = 0;
1173 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1175 if (!(dev->flags & IFF_POINTOPOINT)) {
1176 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1177 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1178 if ((dev->flags & IFF_BROADCAST) &&
1179 ifa->ifa_prefixlen < 31)
1180 ifa->ifa_broadcast = ifa->ifa_address |
1183 ifa->ifa_prefixlen = 32;
1184 ifa->ifa_mask = inet_make_mask(32);
1186 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1187 ret = inet_set_ifa(dev, ifa);
1190 case SIOCSIFBRDADDR: /* Set the broadcast address */
1192 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1193 inet_del_ifa(in_dev, ifap, 0);
1194 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1195 inet_insert_ifa(ifa);
1199 case SIOCSIFDSTADDR: /* Set the destination address */
1201 if (ifa->ifa_address == sin->sin_addr.s_addr)
1204 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1207 inet_del_ifa(in_dev, ifap, 0);
1208 ifa->ifa_address = sin->sin_addr.s_addr;
1209 inet_insert_ifa(ifa);
1212 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1215 * The mask we set must be legal.
1218 if (bad_mask(sin->sin_addr.s_addr, 0))
1221 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1222 __be32 old_mask = ifa->ifa_mask;
1223 inet_del_ifa(in_dev, ifap, 0);
1224 ifa->ifa_mask = sin->sin_addr.s_addr;
1225 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1227 /* See if current broadcast address matches
1228 * with current netmask, then recalculate
1229 * the broadcast address. Otherwise it's a
1230 * funny address, so don't touch it since
1231 * the user seems to know what (s)he's doing...
1233 if ((dev->flags & IFF_BROADCAST) &&
1234 (ifa->ifa_prefixlen < 31) &&
1235 (ifa->ifa_broadcast ==
1236 (ifa->ifa_local|~old_mask))) {
1237 ifa->ifa_broadcast = (ifa->ifa_local |
1238 ~sin->sin_addr.s_addr);
1240 inet_insert_ifa(ifa);
1250 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1252 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1253 const struct in_ifaddr *ifa;
1257 if (WARN_ON(size > sizeof(struct ifreq)))
1263 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1270 memset(&ifr, 0, sizeof(struct ifreq));
1271 strcpy(ifr.ifr_name, ifa->ifa_label);
1273 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1274 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1277 if (copy_to_user(buf + done, &ifr, size)) {
1288 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1291 const struct in_ifaddr *ifa;
1293 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1294 if (ifa->ifa_flags & IFA_F_SECONDARY)
1296 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1297 ifa->ifa_scope <= scope)
1298 return ifa->ifa_local;
1304 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1306 const struct in_ifaddr *ifa;
1308 unsigned char localnet_scope = RT_SCOPE_HOST;
1309 struct in_device *in_dev;
1310 struct net *net = dev_net(dev);
1314 in_dev = __in_dev_get_rcu(dev);
1318 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1319 localnet_scope = RT_SCOPE_LINK;
1321 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1322 if (ifa->ifa_flags & IFA_F_SECONDARY)
1324 if (min(ifa->ifa_scope, localnet_scope) > scope)
1326 if (!dst || inet_ifa_match(dst, ifa)) {
1327 addr = ifa->ifa_local;
1331 addr = ifa->ifa_local;
1337 master_idx = l3mdev_master_ifindex_rcu(dev);
1339 /* For VRFs, the VRF device takes the place of the loopback device,
1340 * with addresses on it being preferred. Note in such cases the
1341 * loopback device will be among the devices that fail the master_idx
1342 * equality check in the loop below.
1345 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1346 (in_dev = __in_dev_get_rcu(dev))) {
1347 addr = in_dev_select_addr(in_dev, scope);
1352 /* Not loopback addresses on loopback should be preferred
1353 in this case. It is important that lo is the first interface
1356 for_each_netdev_rcu(net, dev) {
1357 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1360 in_dev = __in_dev_get_rcu(dev);
1364 addr = in_dev_select_addr(in_dev, scope);
1372 EXPORT_SYMBOL(inet_select_addr);
1374 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1375 __be32 local, int scope)
1377 unsigned char localnet_scope = RT_SCOPE_HOST;
1378 const struct in_ifaddr *ifa;
1382 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1383 localnet_scope = RT_SCOPE_LINK;
1385 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1386 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1389 (local == ifa->ifa_local || !local) &&
1390 min_scope <= scope) {
1391 addr = ifa->ifa_local;
1396 same = (!local || inet_ifa_match(local, ifa)) &&
1397 (!dst || inet_ifa_match(dst, ifa));
1401 /* Is the selected addr into dst subnet? */
1402 if (inet_ifa_match(addr, ifa))
1404 /* No, then can we use new local src? */
1405 if (min_scope <= scope) {
1406 addr = ifa->ifa_local;
1409 /* search for large dst subnet for addr */
1415 return same ? addr : 0;
1419 * Confirm that local IP address exists using wildcards:
1420 * - net: netns to check, cannot be NULL
1421 * - in_dev: only on this interface, NULL=any interface
1422 * - dst: only in the same subnet as dst, 0=any dst
1423 * - local: address, 0=autoselect the local address
1424 * - scope: maximum allowed scope value for the local address
1426 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1427 __be32 dst, __be32 local, int scope)
1430 struct net_device *dev;
1433 return confirm_addr_indev(in_dev, dst, local, scope);
1436 for_each_netdev_rcu(net, dev) {
1437 in_dev = __in_dev_get_rcu(dev);
1439 addr = confirm_addr_indev(in_dev, dst, local, scope);
1448 EXPORT_SYMBOL(inet_confirm_addr);
1454 int register_inetaddr_notifier(struct notifier_block *nb)
1456 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1458 EXPORT_SYMBOL(register_inetaddr_notifier);
1460 int unregister_inetaddr_notifier(struct notifier_block *nb)
1462 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1464 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1466 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1468 return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1470 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1472 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1474 return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1477 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1479 /* Rename ifa_labels for a device name change. Make some effort to preserve
1480 * existing alias numbering and to create unique labels if possible.
1482 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1484 struct in_ifaddr *ifa;
1487 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1488 char old[IFNAMSIZ], *dot;
1490 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1491 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1494 dot = strchr(old, ':');
1496 sprintf(old, ":%d", named);
1499 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1500 strcat(ifa->ifa_label, dot);
1502 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1504 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1508 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1509 struct in_device *in_dev)
1512 const struct in_ifaddr *ifa;
1514 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1515 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1516 ifa->ifa_local, dev,
1517 ifa->ifa_local, NULL,
1518 dev->dev_addr, NULL);
1522 /* Called only under RTNL semaphore */
1524 static int inetdev_event(struct notifier_block *this, unsigned long event,
1527 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1528 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1533 if (event == NETDEV_REGISTER) {
1534 in_dev = inetdev_init(dev);
1536 return notifier_from_errno(PTR_ERR(in_dev));
1537 if (dev->flags & IFF_LOOPBACK) {
1538 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1539 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1541 } else if (event == NETDEV_CHANGEMTU) {
1542 /* Re-enabling IP */
1543 if (inetdev_valid_mtu(dev->mtu))
1544 in_dev = inetdev_init(dev);
1550 case NETDEV_REGISTER:
1551 pr_debug("%s: bug\n", __func__);
1552 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1555 if (!inetdev_valid_mtu(dev->mtu))
1557 if (dev->flags & IFF_LOOPBACK) {
1558 struct in_ifaddr *ifa = inet_alloc_ifa();
1561 INIT_HLIST_NODE(&ifa->hash);
1563 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1564 ifa->ifa_prefixlen = 8;
1565 ifa->ifa_mask = inet_make_mask(8);
1566 in_dev_hold(in_dev);
1567 ifa->ifa_dev = in_dev;
1568 ifa->ifa_scope = RT_SCOPE_HOST;
1569 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1570 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1571 INFINITY_LIFE_TIME);
1572 ipv4_devconf_setall(in_dev);
1573 neigh_parms_data_state_setall(in_dev->arp_parms);
1574 inet_insert_ifa(ifa);
1579 case NETDEV_CHANGEADDR:
1580 if (!IN_DEV_ARP_NOTIFY(in_dev))
1583 case NETDEV_NOTIFY_PEERS:
1584 /* Send gratuitous ARP to notify of link change */
1585 inetdev_send_gratuitous_arp(dev, in_dev);
1590 case NETDEV_PRE_TYPE_CHANGE:
1591 ip_mc_unmap(in_dev);
1593 case NETDEV_POST_TYPE_CHANGE:
1594 ip_mc_remap(in_dev);
1596 case NETDEV_CHANGEMTU:
1597 if (inetdev_valid_mtu(dev->mtu))
1599 /* disable IP when MTU is not enough */
1601 case NETDEV_UNREGISTER:
1602 inetdev_destroy(in_dev);
1604 case NETDEV_CHANGENAME:
1605 /* Do not notify about label change, this event is
1606 * not interesting to applications using netlink.
1608 inetdev_changename(dev, in_dev);
1610 devinet_sysctl_unregister(in_dev);
1611 devinet_sysctl_register(in_dev);
1618 static struct notifier_block ip_netdev_notifier = {
1619 .notifier_call = inetdev_event,
1622 static size_t inet_nlmsg_size(void)
1624 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1625 + nla_total_size(4) /* IFA_ADDRESS */
1626 + nla_total_size(4) /* IFA_LOCAL */
1627 + nla_total_size(4) /* IFA_BROADCAST */
1628 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1629 + nla_total_size(4) /* IFA_FLAGS */
1630 + nla_total_size(1) /* IFA_PROTO */
1631 + nla_total_size(4) /* IFA_RT_PRIORITY */
1632 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1635 static inline u32 cstamp_delta(unsigned long cstamp)
1637 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1640 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1641 unsigned long tstamp, u32 preferred, u32 valid)
1643 struct ifa_cacheinfo ci;
1645 ci.cstamp = cstamp_delta(cstamp);
1646 ci.tstamp = cstamp_delta(tstamp);
1647 ci.ifa_prefered = preferred;
1648 ci.ifa_valid = valid;
1650 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1653 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1654 struct inet_fill_args *args)
1656 struct ifaddrmsg *ifm;
1657 struct nlmsghdr *nlh;
1658 u32 preferred, valid;
1660 nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1665 ifm = nlmsg_data(nlh);
1666 ifm->ifa_family = AF_INET;
1667 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1668 ifm->ifa_flags = ifa->ifa_flags;
1669 ifm->ifa_scope = ifa->ifa_scope;
1670 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1672 if (args->netnsid >= 0 &&
1673 nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1674 goto nla_put_failure;
1676 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1677 preferred = ifa->ifa_preferred_lft;
1678 valid = ifa->ifa_valid_lft;
1679 if (preferred != INFINITY_LIFE_TIME) {
1680 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1682 if (preferred > tval)
1686 if (valid != INFINITY_LIFE_TIME) {
1694 preferred = INFINITY_LIFE_TIME;
1695 valid = INFINITY_LIFE_TIME;
1697 if ((ifa->ifa_address &&
1698 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1700 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1701 (ifa->ifa_broadcast &&
1702 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1703 (ifa->ifa_label[0] &&
1704 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1706 nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1707 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1708 (ifa->ifa_rt_priority &&
1709 nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1710 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1712 goto nla_put_failure;
1714 nlmsg_end(skb, nlh);
1718 nlmsg_cancel(skb, nlh);
1722 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1723 struct inet_fill_args *fillargs,
1724 struct net **tgt_net, struct sock *sk,
1725 struct netlink_callback *cb)
1727 struct netlink_ext_ack *extack = cb->extack;
1728 struct nlattr *tb[IFA_MAX+1];
1729 struct ifaddrmsg *ifm;
1732 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1733 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1737 ifm = nlmsg_data(nlh);
1738 if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1739 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1743 fillargs->ifindex = ifm->ifa_index;
1744 if (fillargs->ifindex) {
1745 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1746 fillargs->flags |= NLM_F_DUMP_FILTERED;
1749 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1750 ifa_ipv4_policy, extack);
1754 for (i = 0; i <= IFA_MAX; ++i) {
1758 if (i == IFA_TARGET_NETNSID) {
1761 fillargs->netnsid = nla_get_s32(tb[i]);
1763 net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1765 fillargs->netnsid = -1;
1766 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1767 return PTR_ERR(net);
1771 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1779 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1780 struct netlink_callback *cb, int s_ip_idx,
1781 struct inet_fill_args *fillargs)
1783 struct in_ifaddr *ifa;
1787 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1788 if (ip_idx < s_ip_idx) {
1792 err = inet_fill_ifaddr(skb, ifa, fillargs);
1796 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1802 cb->args[2] = ip_idx;
1807 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1809 const struct nlmsghdr *nlh = cb->nlh;
1810 struct inet_fill_args fillargs = {
1811 .portid = NETLINK_CB(cb->skb).portid,
1812 .seq = nlh->nlmsg_seq,
1813 .event = RTM_NEWADDR,
1814 .flags = NLM_F_MULTI,
1817 struct net *net = sock_net(skb->sk);
1818 struct net *tgt_net = net;
1822 struct net_device *dev;
1823 struct in_device *in_dev;
1824 struct hlist_head *head;
1828 s_idx = idx = cb->args[1];
1829 s_ip_idx = cb->args[2];
1831 if (cb->strict_check) {
1832 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1838 if (fillargs.ifindex) {
1839 dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1845 in_dev = __in_dev_get_rtnl(dev);
1847 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1854 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1856 head = &tgt_net->dev_index_head[h];
1858 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1859 tgt_net->dev_base_seq;
1860 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1863 if (h > s_h || idx > s_idx)
1865 in_dev = __in_dev_get_rcu(dev);
1869 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1885 if (fillargs.netnsid >= 0)
1888 return skb->len ? : err;
1891 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1894 struct inet_fill_args fillargs = {
1896 .seq = nlh ? nlh->nlmsg_seq : 0,
1901 struct sk_buff *skb;
1905 net = dev_net(ifa->ifa_dev->dev);
1906 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1910 err = inet_fill_ifaddr(skb, ifa, &fillargs);
1912 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1913 WARN_ON(err == -EMSGSIZE);
1917 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1921 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1924 static size_t inet_get_link_af_size(const struct net_device *dev,
1925 u32 ext_filter_mask)
1927 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1932 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1935 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1936 u32 ext_filter_mask)
1938 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1945 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1949 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1950 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1955 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1956 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1959 static int inet_validate_link_af(const struct net_device *dev,
1960 const struct nlattr *nla,
1961 struct netlink_ext_ack *extack)
1963 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1966 if (dev && !__in_dev_get_rtnl(dev))
1967 return -EAFNOSUPPORT;
1969 err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1970 inet_af_policy, extack);
1974 if (tb[IFLA_INET_CONF]) {
1975 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1976 int cfgid = nla_type(a);
1981 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1989 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1990 struct netlink_ext_ack *extack)
1992 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1993 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1997 return -EAFNOSUPPORT;
1999 if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2002 if (tb[IFLA_INET_CONF]) {
2003 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2004 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2010 static int inet_netconf_msgsize_devconf(int type)
2012 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2013 + nla_total_size(4); /* NETCONFA_IFINDEX */
2016 if (type == NETCONFA_ALL)
2019 if (all || type == NETCONFA_FORWARDING)
2020 size += nla_total_size(4);
2021 if (all || type == NETCONFA_RP_FILTER)
2022 size += nla_total_size(4);
2023 if (all || type == NETCONFA_MC_FORWARDING)
2024 size += nla_total_size(4);
2025 if (all || type == NETCONFA_BC_FORWARDING)
2026 size += nla_total_size(4);
2027 if (all || type == NETCONFA_PROXY_NEIGH)
2028 size += nla_total_size(4);
2029 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2030 size += nla_total_size(4);
2035 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2036 struct ipv4_devconf *devconf, u32 portid,
2037 u32 seq, int event, unsigned int flags,
2040 struct nlmsghdr *nlh;
2041 struct netconfmsg *ncm;
2044 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2049 if (type == NETCONFA_ALL)
2052 ncm = nlmsg_data(nlh);
2053 ncm->ncm_family = AF_INET;
2055 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2056 goto nla_put_failure;
2061 if ((all || type == NETCONFA_FORWARDING) &&
2062 nla_put_s32(skb, NETCONFA_FORWARDING,
2063 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2064 goto nla_put_failure;
2065 if ((all || type == NETCONFA_RP_FILTER) &&
2066 nla_put_s32(skb, NETCONFA_RP_FILTER,
2067 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2068 goto nla_put_failure;
2069 if ((all || type == NETCONFA_MC_FORWARDING) &&
2070 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2071 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2072 goto nla_put_failure;
2073 if ((all || type == NETCONFA_BC_FORWARDING) &&
2074 nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2075 IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2076 goto nla_put_failure;
2077 if ((all || type == NETCONFA_PROXY_NEIGH) &&
2078 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2079 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2080 goto nla_put_failure;
2081 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2082 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2083 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2084 goto nla_put_failure;
2087 nlmsg_end(skb, nlh);
2091 nlmsg_cancel(skb, nlh);
2095 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2096 int ifindex, struct ipv4_devconf *devconf)
2098 struct sk_buff *skb;
2101 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2105 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2108 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2109 WARN_ON(err == -EMSGSIZE);
2113 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2117 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2120 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2121 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
2122 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
2123 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
2124 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
2125 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
2128 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2129 const struct nlmsghdr *nlh,
2131 struct netlink_ext_ack *extack)
2135 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2136 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2140 if (!netlink_strict_get_check(skb))
2141 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2143 devconf_ipv4_policy, extack);
2145 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2147 devconf_ipv4_policy, extack);
2151 for (i = 0; i <= NETCONFA_MAX; i++) {
2156 case NETCONFA_IFINDEX:
2159 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2167 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2168 struct nlmsghdr *nlh,
2169 struct netlink_ext_ack *extack)
2171 struct net *net = sock_net(in_skb->sk);
2172 struct nlattr *tb[NETCONFA_MAX+1];
2173 struct sk_buff *skb;
2174 struct ipv4_devconf *devconf;
2175 struct in_device *in_dev;
2176 struct net_device *dev;
2180 err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2185 if (!tb[NETCONFA_IFINDEX])
2188 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2190 case NETCONFA_IFINDEX_ALL:
2191 devconf = net->ipv4.devconf_all;
2193 case NETCONFA_IFINDEX_DEFAULT:
2194 devconf = net->ipv4.devconf_dflt;
2197 dev = __dev_get_by_index(net, ifindex);
2200 in_dev = __in_dev_get_rtnl(dev);
2203 devconf = &in_dev->cnf;
2208 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2212 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2213 NETLINK_CB(in_skb).portid,
2214 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2217 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218 WARN_ON(err == -EMSGSIZE);
2222 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2227 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2228 struct netlink_callback *cb)
2230 const struct nlmsghdr *nlh = cb->nlh;
2231 struct net *net = sock_net(skb->sk);
2234 struct net_device *dev;
2235 struct in_device *in_dev;
2236 struct hlist_head *head;
2238 if (cb->strict_check) {
2239 struct netlink_ext_ack *extack = cb->extack;
2240 struct netconfmsg *ncm;
2242 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2243 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2247 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2248 NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2254 s_idx = idx = cb->args[1];
2256 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2258 head = &net->dev_index_head[h];
2260 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2262 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2265 in_dev = __in_dev_get_rcu(dev);
2269 if (inet_netconf_fill_devconf(skb, dev->ifindex,
2271 NETLINK_CB(cb->skb).portid,
2275 NETCONFA_ALL) < 0) {
2279 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2285 if (h == NETDEV_HASHENTRIES) {
2286 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2287 net->ipv4.devconf_all,
2288 NETLINK_CB(cb->skb).portid,
2290 RTM_NEWNETCONF, NLM_F_MULTI,
2296 if (h == NETDEV_HASHENTRIES + 1) {
2297 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2298 net->ipv4.devconf_dflt,
2299 NETLINK_CB(cb->skb).portid,
2301 RTM_NEWNETCONF, NLM_F_MULTI,
2314 #ifdef CONFIG_SYSCTL
2316 static void devinet_copy_dflt_conf(struct net *net, int i)
2318 struct net_device *dev;
2321 for_each_netdev_rcu(net, dev) {
2322 struct in_device *in_dev;
2324 in_dev = __in_dev_get_rcu(dev);
2325 if (in_dev && !test_bit(i, in_dev->cnf.state))
2326 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2331 /* called with RTNL locked */
2332 static void inet_forward_change(struct net *net)
2334 struct net_device *dev;
2335 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2337 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2338 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2339 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2340 NETCONFA_FORWARDING,
2341 NETCONFA_IFINDEX_ALL,
2342 net->ipv4.devconf_all);
2343 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344 NETCONFA_FORWARDING,
2345 NETCONFA_IFINDEX_DEFAULT,
2346 net->ipv4.devconf_dflt);
2348 for_each_netdev(net, dev) {
2349 struct in_device *in_dev;
2352 dev_disable_lro(dev);
2354 in_dev = __in_dev_get_rtnl(dev);
2356 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2357 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358 NETCONFA_FORWARDING,
2359 dev->ifindex, &in_dev->cnf);
2364 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2366 if (cnf == net->ipv4.devconf_dflt)
2367 return NETCONFA_IFINDEX_DEFAULT;
2368 else if (cnf == net->ipv4.devconf_all)
2369 return NETCONFA_IFINDEX_ALL;
2371 struct in_device *idev
2372 = container_of(cnf, struct in_device, cnf);
2373 return idev->dev->ifindex;
2377 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2378 void *buffer, size_t *lenp, loff_t *ppos)
2380 int old_value = *(int *)ctl->data;
2381 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382 int new_value = *(int *)ctl->data;
2385 struct ipv4_devconf *cnf = ctl->extra1;
2386 struct net *net = ctl->extra2;
2387 int i = (int *)ctl->data - cnf->data;
2390 set_bit(i, cnf->state);
2392 if (cnf == net->ipv4.devconf_dflt)
2393 devinet_copy_dflt_conf(net, i);
2394 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2395 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2396 if ((new_value == 0) && (old_value != 0))
2397 rt_cache_flush(net);
2399 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2400 new_value != old_value)
2401 rt_cache_flush(net);
2403 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2404 new_value != old_value) {
2405 ifindex = devinet_conf_ifindex(net, cnf);
2406 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2410 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2411 new_value != old_value) {
2412 ifindex = devinet_conf_ifindex(net, cnf);
2413 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2414 NETCONFA_PROXY_NEIGH,
2417 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2418 new_value != old_value) {
2419 ifindex = devinet_conf_ifindex(net, cnf);
2420 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2421 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2429 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2430 void *buffer, size_t *lenp, loff_t *ppos)
2432 int *valp = ctl->data;
2435 struct net *net = ctl->extra2;
2438 if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2441 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2443 if (write && *valp != val) {
2444 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2445 if (!rtnl_trylock()) {
2446 /* Restore the original values before restarting */
2449 return restart_syscall();
2451 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2452 inet_forward_change(net);
2454 struct ipv4_devconf *cnf = ctl->extra1;
2455 struct in_device *idev =
2456 container_of(cnf, struct in_device, cnf);
2458 dev_disable_lro(idev->dev);
2459 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2460 NETCONFA_FORWARDING,
2465 rt_cache_flush(net);
2467 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2468 NETCONFA_FORWARDING,
2469 NETCONFA_IFINDEX_DEFAULT,
2470 net->ipv4.devconf_dflt);
2476 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2477 void *buffer, size_t *lenp, loff_t *ppos)
2479 int *valp = ctl->data;
2481 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2482 struct net *net = ctl->extra2;
2484 if (write && *valp != val)
2485 rt_cache_flush(net);
2490 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2493 .data = ipv4_devconf.data + \
2494 IPV4_DEVCONF_ ## attr - 1, \
2495 .maxlen = sizeof(int), \
2497 .proc_handler = proc, \
2498 .extra1 = &ipv4_devconf, \
2501 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2502 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2504 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2505 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2507 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2508 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2510 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2511 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2513 static struct devinet_sysctl_table {
2514 struct ctl_table_header *sysctl_header;
2515 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2516 } devinet_sysctl = {
2518 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2519 devinet_sysctl_forward),
2520 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2521 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2523 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2524 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2525 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2526 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2527 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2528 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2529 "accept_source_route"),
2530 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2531 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2532 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2533 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2534 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2535 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2536 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2537 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2538 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2539 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2540 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2541 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2542 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2543 "arp_evict_nocarrier"),
2544 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2545 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2546 "force_igmp_version"),
2547 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2548 "igmpv2_unsolicited_report_interval"),
2549 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2550 "igmpv3_unsolicited_report_interval"),
2551 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2552 "ignore_routes_with_linkdown"),
2553 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2554 "drop_gratuitous_arp"),
2556 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2557 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2558 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2559 "promote_secondaries"),
2560 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2562 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2563 "drop_unicast_in_l2_multicast"),
2567 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2568 int ifindex, struct ipv4_devconf *p)
2571 struct devinet_sysctl_table *t;
2572 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2574 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2578 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2579 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2580 t->devinet_vars[i].extra1 = p;
2581 t->devinet_vars[i].extra2 = net;
2584 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2586 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2587 if (!t->sysctl_header)
2592 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2602 static void __devinet_sysctl_unregister(struct net *net,
2603 struct ipv4_devconf *cnf, int ifindex)
2605 struct devinet_sysctl_table *t = cnf->sysctl;
2609 unregister_net_sysctl_table(t->sysctl_header);
2613 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2616 static int devinet_sysctl_register(struct in_device *idev)
2620 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2623 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2626 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2627 idev->dev->ifindex, &idev->cnf);
2629 neigh_sysctl_unregister(idev->arp_parms);
2633 static void devinet_sysctl_unregister(struct in_device *idev)
2635 struct net *net = dev_net(idev->dev);
2637 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2638 neigh_sysctl_unregister(idev->arp_parms);
2641 static struct ctl_table ctl_forward_entry[] = {
2643 .procname = "ip_forward",
2644 .data = &ipv4_devconf.data[
2645 IPV4_DEVCONF_FORWARDING - 1],
2646 .maxlen = sizeof(int),
2648 .proc_handler = devinet_sysctl_forward,
2649 .extra1 = &ipv4_devconf,
2650 .extra2 = &init_net,
2656 static __net_init int devinet_init_net(struct net *net)
2659 struct ipv4_devconf *all, *dflt;
2660 #ifdef CONFIG_SYSCTL
2661 struct ctl_table *tbl;
2662 struct ctl_table_header *forw_hdr;
2666 all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2670 dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2672 goto err_alloc_dflt;
2674 #ifdef CONFIG_SYSCTL
2675 tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2679 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2680 tbl[0].extra1 = all;
2681 tbl[0].extra2 = net;
2684 if (!net_eq(net, &init_net)) {
2685 if (IS_ENABLED(CONFIG_SYSCTL) &&
2686 sysctl_devconf_inherit_init_net == 3) {
2687 /* copy from the current netns */
2688 memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2689 sizeof(ipv4_devconf));
2691 current->nsproxy->net_ns->ipv4.devconf_dflt,
2692 sizeof(ipv4_devconf_dflt));
2693 } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
2694 sysctl_devconf_inherit_init_net != 2) {
2695 /* inherit == 0 or 1: copy from init_net */
2696 memcpy(all, init_net.ipv4.devconf_all,
2697 sizeof(ipv4_devconf));
2698 memcpy(dflt, init_net.ipv4.devconf_dflt,
2699 sizeof(ipv4_devconf_dflt));
2701 /* else inherit == 2: use compiled values */
2704 #ifdef CONFIG_SYSCTL
2705 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2709 err = __devinet_sysctl_register(net, "default",
2710 NETCONFA_IFINDEX_DEFAULT, dflt);
2715 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2718 net->ipv4.forw_hdr = forw_hdr;
2721 net->ipv4.devconf_all = all;
2722 net->ipv4.devconf_dflt = dflt;
2725 #ifdef CONFIG_SYSCTL
2727 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2729 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2741 static __net_exit void devinet_exit_net(struct net *net)
2743 #ifdef CONFIG_SYSCTL
2744 struct ctl_table *tbl;
2746 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2747 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2748 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2749 NETCONFA_IFINDEX_DEFAULT);
2750 __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2751 NETCONFA_IFINDEX_ALL);
2754 kfree(net->ipv4.devconf_dflt);
2755 kfree(net->ipv4.devconf_all);
2758 static __net_initdata struct pernet_operations devinet_ops = {
2759 .init = devinet_init_net,
2760 .exit = devinet_exit_net,
2763 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2765 .fill_link_af = inet_fill_link_af,
2766 .get_link_af_size = inet_get_link_af_size,
2767 .validate_link_af = inet_validate_link_af,
2768 .set_link_af = inet_set_link_af,
2771 void __init devinet_init(void)
2775 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2776 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2778 register_pernet_subsys(&devinet_ops);
2779 register_netdevice_notifier(&ip_netdev_notifier);
2781 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2783 rtnl_af_register(&inet_af_ops);
2785 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2786 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2787 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2788 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2789 inet_netconf_dump_devconf, 0);