Merge tag 'acpi-6.2-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[platform/kernel/linux-starfive.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227         if (ifa->ifa_dev)
228                 in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239         struct in_device *idev = container_of(head, struct in_device, rcu_head);
240
241         kfree(rcu_dereference_protected(idev->mc_hash, 1));
242         kfree(idev);
243 }
244
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247         struct net_device *dev = idev->dev;
248
249         WARN_ON(idev->ifa_list);
250         WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254         netdev_put(dev, &idev->dev_tracker);
255         if (!idev->dead)
256                 pr_err("Freeing alive in_device %p\n", idev);
257         else
258                 call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264         struct in_device *in_dev;
265         int err = -ENOMEM;
266
267         ASSERT_RTNL();
268
269         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270         if (!in_dev)
271                 goto out;
272         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273                         sizeof(in_dev->cnf));
274         in_dev->cnf.sysctl = NULL;
275         in_dev->dev = dev;
276         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277         if (!in_dev->arp_parms)
278                 goto out_kfree;
279         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280                 dev_disable_lro(dev);
281         /* Reference in_dev->dev */
282         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283         /* Account for reference dev->ip_ptr (below) */
284         refcount_set(&in_dev->refcnt, 1);
285
286         err = devinet_sysctl_register(in_dev);
287         if (err) {
288                 in_dev->dead = 1;
289                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290                 in_dev_put(in_dev);
291                 in_dev = NULL;
292                 goto out;
293         }
294         ip_mc_init_dev(in_dev);
295         if (dev->flags & IFF_UP)
296                 ip_mc_up(in_dev);
297
298         /* we can receive as soon as ip_ptr is set -- do this last */
299         rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301         return in_dev ?: ERR_PTR(err);
302 out_kfree:
303         kfree(in_dev);
304         in_dev = NULL;
305         goto out;
306 }
307
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310         struct net_device *dev;
311         struct in_ifaddr *ifa;
312
313         ASSERT_RTNL();
314
315         dev = in_dev->dev;
316
317         in_dev->dead = 1;
318
319         ip_mc_destroy_dev(in_dev);
320
321         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323                 inet_free_ifa(ifa);
324         }
325
326         RCU_INIT_POINTER(dev->ip_ptr, NULL);
327
328         devinet_sysctl_unregister(in_dev);
329         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330         arp_ifdown(dev);
331
332         in_dev_put(in_dev);
333 }
334
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337         const struct in_ifaddr *ifa;
338
339         rcu_read_lock();
340         in_dev_for_each_ifa_rcu(ifa, in_dev) {
341                 if (inet_ifa_match(a, ifa)) {
342                         if (!b || inet_ifa_match(b, ifa)) {
343                                 rcu_read_unlock();
344                                 return 1;
345                         }
346                 }
347         }
348         rcu_read_unlock();
349         return 0;
350 }
351
352 static void __inet_del_ifa(struct in_device *in_dev,
353                            struct in_ifaddr __rcu **ifap,
354                            int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356         struct in_ifaddr *promote = NULL;
357         struct in_ifaddr *ifa, *ifa1;
358         struct in_ifaddr *last_prim;
359         struct in_ifaddr *prev_prom = NULL;
360         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361
362         ASSERT_RTNL();
363
364         ifa1 = rtnl_dereference(*ifap);
365         last_prim = rtnl_dereference(in_dev->ifa_list);
366         if (in_dev->dead)
367                 goto no_promotions;
368
369         /* 1. Deleting primary ifaddr forces deletion all secondaries
370          * unless alias promotion is set
371          **/
372
373         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375
376                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378                             ifa1->ifa_scope <= ifa->ifa_scope)
379                                 last_prim = ifa;
380
381                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382                             ifa1->ifa_mask != ifa->ifa_mask ||
383                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
384                                 ifap1 = &ifa->ifa_next;
385                                 prev_prom = ifa;
386                                 continue;
387                         }
388
389                         if (!do_promote) {
390                                 inet_hash_remove(ifa);
391                                 *ifap1 = ifa->ifa_next;
392
393                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394                                 blocking_notifier_call_chain(&inetaddr_chain,
395                                                 NETDEV_DOWN, ifa);
396                                 inet_free_ifa(ifa);
397                         } else {
398                                 promote = ifa;
399                                 break;
400                         }
401                 }
402         }
403
404         /* On promotion all secondaries from subnet are changing
405          * the primary IP, we must remove all their routes silently
406          * and later to add them back with new prefsrc. Do this
407          * while all addresses are on the device list.
408          */
409         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410                 if (ifa1->ifa_mask == ifa->ifa_mask &&
411                     inet_ifa_match(ifa1->ifa_address, ifa))
412                         fib_del_ifaddr(ifa, ifa1);
413         }
414
415 no_promotions:
416         /* 2. Unlink it */
417
418         *ifap = ifa1->ifa_next;
419         inet_hash_remove(ifa1);
420
421         /* 3. Announce address deletion */
422
423         /* Send message first, then call notifier.
424            At first sight, FIB update triggered by notifier
425            will refer to already deleted ifaddr, that could confuse
426            netlink listeners. It is not true: look, gated sees
427            that route deleted and if it still thinks that ifaddr
428            is valid, it will try to restore deleted routes... Grr.
429            So that, this order is correct.
430          */
431         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433
434         if (promote) {
435                 struct in_ifaddr *next_sec;
436
437                 next_sec = rtnl_dereference(promote->ifa_next);
438                 if (prev_prom) {
439                         struct in_ifaddr *last_sec;
440
441                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442
443                         last_sec = rtnl_dereference(last_prim->ifa_next);
444                         rcu_assign_pointer(promote->ifa_next, last_sec);
445                         rcu_assign_pointer(last_prim->ifa_next, promote);
446                 }
447
448                 promote->ifa_flags &= ~IFA_F_SECONDARY;
449                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450                 blocking_notifier_call_chain(&inetaddr_chain,
451                                 NETDEV_UP, promote);
452                 for (ifa = next_sec; ifa;
453                      ifa = rtnl_dereference(ifa->ifa_next)) {
454                         if (ifa1->ifa_mask != ifa->ifa_mask ||
455                             !inet_ifa_match(ifa1->ifa_address, ifa))
456                                         continue;
457                         fib_add_ifaddr(ifa);
458                 }
459
460         }
461         if (destroy)
462                 inet_free_ifa(ifa1);
463 }
464
465 static void inet_del_ifa(struct in_device *in_dev,
466                          struct in_ifaddr __rcu **ifap,
467                          int destroy)
468 {
469         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471
472 static void check_lifetime(struct work_struct *work);
473
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477                              u32 portid, struct netlink_ext_ack *extack)
478 {
479         struct in_ifaddr __rcu **last_primary, **ifap;
480         struct in_device *in_dev = ifa->ifa_dev;
481         struct in_validator_info ivi;
482         struct in_ifaddr *ifa1;
483         int ret;
484
485         ASSERT_RTNL();
486
487         if (!ifa->ifa_local) {
488                 inet_free_ifa(ifa);
489                 return 0;
490         }
491
492         ifa->ifa_flags &= ~IFA_F_SECONDARY;
493         last_primary = &in_dev->ifa_list;
494
495         /* Don't set IPv6 only flags to IPv4 addresses */
496         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497
498         ifap = &in_dev->ifa_list;
499         ifa1 = rtnl_dereference(*ifap);
500
501         while (ifa1) {
502                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503                     ifa->ifa_scope <= ifa1->ifa_scope)
504                         last_primary = &ifa1->ifa_next;
505                 if (ifa1->ifa_mask == ifa->ifa_mask &&
506                     inet_ifa_match(ifa1->ifa_address, ifa)) {
507                         if (ifa1->ifa_local == ifa->ifa_local) {
508                                 inet_free_ifa(ifa);
509                                 return -EEXIST;
510                         }
511                         if (ifa1->ifa_scope != ifa->ifa_scope) {
512                                 inet_free_ifa(ifa);
513                                 return -EINVAL;
514                         }
515                         ifa->ifa_flags |= IFA_F_SECONDARY;
516                 }
517
518                 ifap = &ifa1->ifa_next;
519                 ifa1 = rtnl_dereference(*ifap);
520         }
521
522         /* Allow any devices that wish to register ifaddr validtors to weigh
523          * in now, before changes are committed.  The rntl lock is serializing
524          * access here, so the state should not change between a validator call
525          * and a final notify on commit.  This isn't invoked on promotion under
526          * the assumption that validators are checking the address itself, and
527          * not the flags.
528          */
529         ivi.ivi_addr = ifa->ifa_address;
530         ivi.ivi_dev = ifa->ifa_dev;
531         ivi.extack = extack;
532         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
533                                            NETDEV_UP, &ivi);
534         ret = notifier_to_errno(ret);
535         if (ret) {
536                 inet_free_ifa(ifa);
537                 return ret;
538         }
539
540         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
541                 ifap = last_primary;
542
543         rcu_assign_pointer(ifa->ifa_next, *ifap);
544         rcu_assign_pointer(*ifap, ifa);
545
546         inet_hash_insert(dev_net(in_dev->dev), ifa);
547
548         cancel_delayed_work(&check_lifetime_work);
549         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
550
551         /* Send message first, then call notifier.
552            Notifier will trigger FIB update, so that
553            listeners of netlink will know about new ifaddr */
554         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
555         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
556
557         return 0;
558 }
559
560 static int inet_insert_ifa(struct in_ifaddr *ifa)
561 {
562         return __inet_insert_ifa(ifa, NULL, 0, NULL);
563 }
564
565 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
566 {
567         struct in_device *in_dev = __in_dev_get_rtnl(dev);
568
569         ASSERT_RTNL();
570
571         if (!in_dev) {
572                 inet_free_ifa(ifa);
573                 return -ENOBUFS;
574         }
575         ipv4_devconf_setall(in_dev);
576         neigh_parms_data_state_setall(in_dev->arp_parms);
577         if (ifa->ifa_dev != in_dev) {
578                 WARN_ON(ifa->ifa_dev);
579                 in_dev_hold(in_dev);
580                 ifa->ifa_dev = in_dev;
581         }
582         if (ipv4_is_loopback(ifa->ifa_local))
583                 ifa->ifa_scope = RT_SCOPE_HOST;
584         return inet_insert_ifa(ifa);
585 }
586
587 /* Caller must hold RCU or RTNL :
588  * We dont take a reference on found in_device
589  */
590 struct in_device *inetdev_by_index(struct net *net, int ifindex)
591 {
592         struct net_device *dev;
593         struct in_device *in_dev = NULL;
594
595         rcu_read_lock();
596         dev = dev_get_by_index_rcu(net, ifindex);
597         if (dev)
598                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
599         rcu_read_unlock();
600         return in_dev;
601 }
602 EXPORT_SYMBOL(inetdev_by_index);
603
604 /* Called only from RTNL semaphored context. No locks. */
605
606 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
607                                     __be32 mask)
608 {
609         struct in_ifaddr *ifa;
610
611         ASSERT_RTNL();
612
613         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
614                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
615                         return ifa;
616         }
617         return NULL;
618 }
619
620 static int ip_mc_autojoin_config(struct net *net, bool join,
621                                  const struct in_ifaddr *ifa)
622 {
623 #if defined(CONFIG_IP_MULTICAST)
624         struct ip_mreqn mreq = {
625                 .imr_multiaddr.s_addr = ifa->ifa_address,
626                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
627         };
628         struct sock *sk = net->ipv4.mc_autojoin_sk;
629         int ret;
630
631         ASSERT_RTNL();
632
633         lock_sock(sk);
634         if (join)
635                 ret = ip_mc_join_group(sk, &mreq);
636         else
637                 ret = ip_mc_leave_group(sk, &mreq);
638         release_sock(sk);
639
640         return ret;
641 #else
642         return -EOPNOTSUPP;
643 #endif
644 }
645
646 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
647                             struct netlink_ext_ack *extack)
648 {
649         struct net *net = sock_net(skb->sk);
650         struct in_ifaddr __rcu **ifap;
651         struct nlattr *tb[IFA_MAX+1];
652         struct in_device *in_dev;
653         struct ifaddrmsg *ifm;
654         struct in_ifaddr *ifa;
655         int err;
656
657         ASSERT_RTNL();
658
659         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
660                                      ifa_ipv4_policy, extack);
661         if (err < 0)
662                 goto errout;
663
664         ifm = nlmsg_data(nlh);
665         in_dev = inetdev_by_index(net, ifm->ifa_index);
666         if (!in_dev) {
667                 err = -ENODEV;
668                 goto errout;
669         }
670
671         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
672              ifap = &ifa->ifa_next) {
673                 if (tb[IFA_LOCAL] &&
674                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
675                         continue;
676
677                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
678                         continue;
679
680                 if (tb[IFA_ADDRESS] &&
681                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
682                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
683                         continue;
684
685                 if (ipv4_is_multicast(ifa->ifa_address))
686                         ip_mc_autojoin_config(net, false, ifa);
687                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
688                 return 0;
689         }
690
691         err = -EADDRNOTAVAIL;
692 errout:
693         return err;
694 }
695
696 #define INFINITY_LIFE_TIME      0xFFFFFFFF
697
698 static void check_lifetime(struct work_struct *work)
699 {
700         unsigned long now, next, next_sec, next_sched;
701         struct in_ifaddr *ifa;
702         struct hlist_node *n;
703         int i;
704
705         now = jiffies;
706         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
707
708         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
709                 bool change_needed = false;
710
711                 rcu_read_lock();
712                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
713                         unsigned long age;
714
715                         if (ifa->ifa_flags & IFA_F_PERMANENT)
716                                 continue;
717
718                         /* We try to batch several events at once. */
719                         age = (now - ifa->ifa_tstamp +
720                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721
722                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723                             age >= ifa->ifa_valid_lft) {
724                                 change_needed = true;
725                         } else if (ifa->ifa_preferred_lft ==
726                                    INFINITY_LIFE_TIME) {
727                                 continue;
728                         } else if (age >= ifa->ifa_preferred_lft) {
729                                 if (time_before(ifa->ifa_tstamp +
730                                                 ifa->ifa_valid_lft * HZ, next))
731                                         next = ifa->ifa_tstamp +
732                                                ifa->ifa_valid_lft * HZ;
733
734                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
735                                         change_needed = true;
736                         } else if (time_before(ifa->ifa_tstamp +
737                                                ifa->ifa_preferred_lft * HZ,
738                                                next)) {
739                                 next = ifa->ifa_tstamp +
740                                        ifa->ifa_preferred_lft * HZ;
741                         }
742                 }
743                 rcu_read_unlock();
744                 if (!change_needed)
745                         continue;
746                 rtnl_lock();
747                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
748                         unsigned long age;
749
750                         if (ifa->ifa_flags & IFA_F_PERMANENT)
751                                 continue;
752
753                         /* We try to batch several events at once. */
754                         age = (now - ifa->ifa_tstamp +
755                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
756
757                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
758                             age >= ifa->ifa_valid_lft) {
759                                 struct in_ifaddr __rcu **ifap;
760                                 struct in_ifaddr *tmp;
761
762                                 ifap = &ifa->ifa_dev->ifa_list;
763                                 tmp = rtnl_dereference(*ifap);
764                                 while (tmp) {
765                                         if (tmp == ifa) {
766                                                 inet_del_ifa(ifa->ifa_dev,
767                                                              ifap, 1);
768                                                 break;
769                                         }
770                                         ifap = &tmp->ifa_next;
771                                         tmp = rtnl_dereference(*ifap);
772                                 }
773                         } else if (ifa->ifa_preferred_lft !=
774                                    INFINITY_LIFE_TIME &&
775                                    age >= ifa->ifa_preferred_lft &&
776                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
777                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
778                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
779                         }
780                 }
781                 rtnl_unlock();
782         }
783
784         next_sec = round_jiffies_up(next);
785         next_sched = next;
786
787         /* If rounded timeout is accurate enough, accept it. */
788         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
789                 next_sched = next_sec;
790
791         now = jiffies;
792         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
793         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
794                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
795
796         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
797                         next_sched - now);
798 }
799
800 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
801                              __u32 prefered_lft)
802 {
803         unsigned long timeout;
804
805         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
806
807         timeout = addrconf_timeout_fixup(valid_lft, HZ);
808         if (addrconf_finite_timeout(timeout))
809                 ifa->ifa_valid_lft = timeout;
810         else
811                 ifa->ifa_flags |= IFA_F_PERMANENT;
812
813         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
814         if (addrconf_finite_timeout(timeout)) {
815                 if (timeout == 0)
816                         ifa->ifa_flags |= IFA_F_DEPRECATED;
817                 ifa->ifa_preferred_lft = timeout;
818         }
819         ifa->ifa_tstamp = jiffies;
820         if (!ifa->ifa_cstamp)
821                 ifa->ifa_cstamp = ifa->ifa_tstamp;
822 }
823
824 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
825                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
826                                        struct netlink_ext_ack *extack)
827 {
828         struct nlattr *tb[IFA_MAX+1];
829         struct in_ifaddr *ifa;
830         struct ifaddrmsg *ifm;
831         struct net_device *dev;
832         struct in_device *in_dev;
833         int err;
834
835         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
836                                      ifa_ipv4_policy, extack);
837         if (err < 0)
838                 goto errout;
839
840         ifm = nlmsg_data(nlh);
841         err = -EINVAL;
842         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
843                 goto errout;
844
845         dev = __dev_get_by_index(net, ifm->ifa_index);
846         err = -ENODEV;
847         if (!dev)
848                 goto errout;
849
850         in_dev = __in_dev_get_rtnl(dev);
851         err = -ENOBUFS;
852         if (!in_dev)
853                 goto errout;
854
855         ifa = inet_alloc_ifa();
856         if (!ifa)
857                 /*
858                  * A potential indev allocation can be left alive, it stays
859                  * assigned to its device and is destroy with it.
860                  */
861                 goto errout;
862
863         ipv4_devconf_setall(in_dev);
864         neigh_parms_data_state_setall(in_dev->arp_parms);
865         in_dev_hold(in_dev);
866
867         if (!tb[IFA_ADDRESS])
868                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
869
870         INIT_HLIST_NODE(&ifa->hash);
871         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
872         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
873         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
874                                          ifm->ifa_flags;
875         ifa->ifa_scope = ifm->ifa_scope;
876         ifa->ifa_dev = in_dev;
877
878         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
879         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880
881         if (tb[IFA_BROADCAST])
882                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
883
884         if (tb[IFA_LABEL])
885                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
886         else
887                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
888
889         if (tb[IFA_RT_PRIORITY])
890                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
891
892         if (tb[IFA_PROTO])
893                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
894
895         if (tb[IFA_CACHEINFO]) {
896                 struct ifa_cacheinfo *ci;
897
898                 ci = nla_data(tb[IFA_CACHEINFO]);
899                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
900                         err = -EINVAL;
901                         goto errout_free;
902                 }
903                 *pvalid_lft = ci->ifa_valid;
904                 *pprefered_lft = ci->ifa_prefered;
905         }
906
907         return ifa;
908
909 errout_free:
910         inet_free_ifa(ifa);
911 errout:
912         return ERR_PTR(err);
913 }
914
915 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
916 {
917         struct in_device *in_dev = ifa->ifa_dev;
918         struct in_ifaddr *ifa1;
919
920         if (!ifa->ifa_local)
921                 return NULL;
922
923         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
924                 if (ifa1->ifa_mask == ifa->ifa_mask &&
925                     inet_ifa_match(ifa1->ifa_address, ifa) &&
926                     ifa1->ifa_local == ifa->ifa_local)
927                         return ifa1;
928         }
929         return NULL;
930 }
931
932 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
933                             struct netlink_ext_ack *extack)
934 {
935         struct net *net = sock_net(skb->sk);
936         struct in_ifaddr *ifa;
937         struct in_ifaddr *ifa_existing;
938         __u32 valid_lft = INFINITY_LIFE_TIME;
939         __u32 prefered_lft = INFINITY_LIFE_TIME;
940
941         ASSERT_RTNL();
942
943         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
944         if (IS_ERR(ifa))
945                 return PTR_ERR(ifa);
946
947         ifa_existing = find_matching_ifa(ifa);
948         if (!ifa_existing) {
949                 /* It would be best to check for !NLM_F_CREATE here but
950                  * userspace already relies on not having to provide this.
951                  */
952                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
953                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
954                         int ret = ip_mc_autojoin_config(net, true, ifa);
955
956                         if (ret < 0) {
957                                 inet_free_ifa(ifa);
958                                 return ret;
959                         }
960                 }
961                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
962                                          extack);
963         } else {
964                 u32 new_metric = ifa->ifa_rt_priority;
965
966                 inet_free_ifa(ifa);
967
968                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
969                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
970                         return -EEXIST;
971                 ifa = ifa_existing;
972
973                 if (ifa->ifa_rt_priority != new_metric) {
974                         fib_modify_prefix_metric(ifa, new_metric);
975                         ifa->ifa_rt_priority = new_metric;
976                 }
977
978                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
979                 cancel_delayed_work(&check_lifetime_work);
980                 queue_delayed_work(system_power_efficient_wq,
981                                 &check_lifetime_work, 0);
982                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
983         }
984         return 0;
985 }
986
987 /*
988  *      Determine a default network mask, based on the IP address.
989  */
990
991 static int inet_abc_len(__be32 addr)
992 {
993         int rc = -1;    /* Something else, probably a multicast. */
994
995         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
996                 rc = 0;
997         else {
998                 __u32 haddr = ntohl(addr);
999                 if (IN_CLASSA(haddr))
1000                         rc = 8;
1001                 else if (IN_CLASSB(haddr))
1002                         rc = 16;
1003                 else if (IN_CLASSC(haddr))
1004                         rc = 24;
1005                 else if (IN_CLASSE(haddr))
1006                         rc = 32;
1007         }
1008
1009         return rc;
1010 }
1011
1012
1013 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1014 {
1015         struct sockaddr_in sin_orig;
1016         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1017         struct in_ifaddr __rcu **ifap = NULL;
1018         struct in_device *in_dev;
1019         struct in_ifaddr *ifa = NULL;
1020         struct net_device *dev;
1021         char *colon;
1022         int ret = -EFAULT;
1023         int tryaddrmatch = 0;
1024
1025         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1026
1027         /* save original address for comparison */
1028         memcpy(&sin_orig, sin, sizeof(*sin));
1029
1030         colon = strchr(ifr->ifr_name, ':');
1031         if (colon)
1032                 *colon = 0;
1033
1034         dev_load(net, ifr->ifr_name);
1035
1036         switch (cmd) {
1037         case SIOCGIFADDR:       /* Get interface address */
1038         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1039         case SIOCGIFDSTADDR:    /* Get the destination address */
1040         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1041                 /* Note that these ioctls will not sleep,
1042                    so that we do not impose a lock.
1043                    One day we will be forced to put shlock here (I mean SMP)
1044                  */
1045                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1046                 memset(sin, 0, sizeof(*sin));
1047                 sin->sin_family = AF_INET;
1048                 break;
1049
1050         case SIOCSIFFLAGS:
1051                 ret = -EPERM;
1052                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1053                         goto out;
1054                 break;
1055         case SIOCSIFADDR:       /* Set interface address (and family) */
1056         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1057         case SIOCSIFDSTADDR:    /* Set the destination address */
1058         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1059                 ret = -EPERM;
1060                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1061                         goto out;
1062                 ret = -EINVAL;
1063                 if (sin->sin_family != AF_INET)
1064                         goto out;
1065                 break;
1066         default:
1067                 ret = -EINVAL;
1068                 goto out;
1069         }
1070
1071         rtnl_lock();
1072
1073         ret = -ENODEV;
1074         dev = __dev_get_by_name(net, ifr->ifr_name);
1075         if (!dev)
1076                 goto done;
1077
1078         if (colon)
1079                 *colon = ':';
1080
1081         in_dev = __in_dev_get_rtnl(dev);
1082         if (in_dev) {
1083                 if (tryaddrmatch) {
1084                         /* Matthias Andree */
1085                         /* compare label and address (4.4BSD style) */
1086                         /* note: we only do this for a limited set of ioctls
1087                            and only if the original address family was AF_INET.
1088                            This is checked above. */
1089
1090                         for (ifap = &in_dev->ifa_list;
1091                              (ifa = rtnl_dereference(*ifap)) != NULL;
1092                              ifap = &ifa->ifa_next) {
1093                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1094                                     sin_orig.sin_addr.s_addr ==
1095                                                         ifa->ifa_local) {
1096                                         break; /* found */
1097                                 }
1098                         }
1099                 }
1100                 /* we didn't get a match, maybe the application is
1101                    4.3BSD-style and passed in junk so we fall back to
1102                    comparing just the label */
1103                 if (!ifa) {
1104                         for (ifap = &in_dev->ifa_list;
1105                              (ifa = rtnl_dereference(*ifap)) != NULL;
1106                              ifap = &ifa->ifa_next)
1107                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1108                                         break;
1109                 }
1110         }
1111
1112         ret = -EADDRNOTAVAIL;
1113         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1114                 goto done;
1115
1116         switch (cmd) {
1117         case SIOCGIFADDR:       /* Get interface address */
1118                 ret = 0;
1119                 sin->sin_addr.s_addr = ifa->ifa_local;
1120                 break;
1121
1122         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1123                 ret = 0;
1124                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1125                 break;
1126
1127         case SIOCGIFDSTADDR:    /* Get the destination address */
1128                 ret = 0;
1129                 sin->sin_addr.s_addr = ifa->ifa_address;
1130                 break;
1131
1132         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1133                 ret = 0;
1134                 sin->sin_addr.s_addr = ifa->ifa_mask;
1135                 break;
1136
1137         case SIOCSIFFLAGS:
1138                 if (colon) {
1139                         ret = -EADDRNOTAVAIL;
1140                         if (!ifa)
1141                                 break;
1142                         ret = 0;
1143                         if (!(ifr->ifr_flags & IFF_UP))
1144                                 inet_del_ifa(in_dev, ifap, 1);
1145                         break;
1146                 }
1147                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1148                 break;
1149
1150         case SIOCSIFADDR:       /* Set interface address (and family) */
1151                 ret = -EINVAL;
1152                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1153                         break;
1154
1155                 if (!ifa) {
1156                         ret = -ENOBUFS;
1157                         ifa = inet_alloc_ifa();
1158                         if (!ifa)
1159                                 break;
1160                         INIT_HLIST_NODE(&ifa->hash);
1161                         if (colon)
1162                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1163                         else
1164                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1165                 } else {
1166                         ret = 0;
1167                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1168                                 break;
1169                         inet_del_ifa(in_dev, ifap, 0);
1170                         ifa->ifa_broadcast = 0;
1171                         ifa->ifa_scope = 0;
1172                 }
1173
1174                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1175
1176                 if (!(dev->flags & IFF_POINTOPOINT)) {
1177                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1178                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1179                         if ((dev->flags & IFF_BROADCAST) &&
1180                             ifa->ifa_prefixlen < 31)
1181                                 ifa->ifa_broadcast = ifa->ifa_address |
1182                                                      ~ifa->ifa_mask;
1183                 } else {
1184                         ifa->ifa_prefixlen = 32;
1185                         ifa->ifa_mask = inet_make_mask(32);
1186                 }
1187                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1188                 ret = inet_set_ifa(dev, ifa);
1189                 break;
1190
1191         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1192                 ret = 0;
1193                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1194                         inet_del_ifa(in_dev, ifap, 0);
1195                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1196                         inet_insert_ifa(ifa);
1197                 }
1198                 break;
1199
1200         case SIOCSIFDSTADDR:    /* Set the destination address */
1201                 ret = 0;
1202                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1203                         break;
1204                 ret = -EINVAL;
1205                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1206                         break;
1207                 ret = 0;
1208                 inet_del_ifa(in_dev, ifap, 0);
1209                 ifa->ifa_address = sin->sin_addr.s_addr;
1210                 inet_insert_ifa(ifa);
1211                 break;
1212
1213         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1214
1215                 /*
1216                  *      The mask we set must be legal.
1217                  */
1218                 ret = -EINVAL;
1219                 if (bad_mask(sin->sin_addr.s_addr, 0))
1220                         break;
1221                 ret = 0;
1222                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1223                         __be32 old_mask = ifa->ifa_mask;
1224                         inet_del_ifa(in_dev, ifap, 0);
1225                         ifa->ifa_mask = sin->sin_addr.s_addr;
1226                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1227
1228                         /* See if current broadcast address matches
1229                          * with current netmask, then recalculate
1230                          * the broadcast address. Otherwise it's a
1231                          * funny address, so don't touch it since
1232                          * the user seems to know what (s)he's doing...
1233                          */
1234                         if ((dev->flags & IFF_BROADCAST) &&
1235                             (ifa->ifa_prefixlen < 31) &&
1236                             (ifa->ifa_broadcast ==
1237                              (ifa->ifa_local|~old_mask))) {
1238                                 ifa->ifa_broadcast = (ifa->ifa_local |
1239                                                       ~sin->sin_addr.s_addr);
1240                         }
1241                         inet_insert_ifa(ifa);
1242                 }
1243                 break;
1244         }
1245 done:
1246         rtnl_unlock();
1247 out:
1248         return ret;
1249 }
1250
1251 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1252 {
1253         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1254         const struct in_ifaddr *ifa;
1255         struct ifreq ifr;
1256         int done = 0;
1257
1258         if (WARN_ON(size > sizeof(struct ifreq)))
1259                 goto out;
1260
1261         if (!in_dev)
1262                 goto out;
1263
1264         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1265                 if (!buf) {
1266                         done += size;
1267                         continue;
1268                 }
1269                 if (len < size)
1270                         break;
1271                 memset(&ifr, 0, sizeof(struct ifreq));
1272                 strcpy(ifr.ifr_name, ifa->ifa_label);
1273
1274                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1275                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1276                                                                 ifa->ifa_local;
1277
1278                 if (copy_to_user(buf + done, &ifr, size)) {
1279                         done = -EFAULT;
1280                         break;
1281                 }
1282                 len  -= size;
1283                 done += size;
1284         }
1285 out:
1286         return done;
1287 }
1288
1289 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1290                                  int scope)
1291 {
1292         const struct in_ifaddr *ifa;
1293
1294         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1295                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1296                         continue;
1297                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1298                     ifa->ifa_scope <= scope)
1299                         return ifa->ifa_local;
1300         }
1301
1302         return 0;
1303 }
1304
1305 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1306 {
1307         const struct in_ifaddr *ifa;
1308         __be32 addr = 0;
1309         unsigned char localnet_scope = RT_SCOPE_HOST;
1310         struct in_device *in_dev;
1311         struct net *net = dev_net(dev);
1312         int master_idx;
1313
1314         rcu_read_lock();
1315         in_dev = __in_dev_get_rcu(dev);
1316         if (!in_dev)
1317                 goto no_in_dev;
1318
1319         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1320                 localnet_scope = RT_SCOPE_LINK;
1321
1322         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1323                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1324                         continue;
1325                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1326                         continue;
1327                 if (!dst || inet_ifa_match(dst, ifa)) {
1328                         addr = ifa->ifa_local;
1329                         break;
1330                 }
1331                 if (!addr)
1332                         addr = ifa->ifa_local;
1333         }
1334
1335         if (addr)
1336                 goto out_unlock;
1337 no_in_dev:
1338         master_idx = l3mdev_master_ifindex_rcu(dev);
1339
1340         /* For VRFs, the VRF device takes the place of the loopback device,
1341          * with addresses on it being preferred.  Note in such cases the
1342          * loopback device will be among the devices that fail the master_idx
1343          * equality check in the loop below.
1344          */
1345         if (master_idx &&
1346             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1347             (in_dev = __in_dev_get_rcu(dev))) {
1348                 addr = in_dev_select_addr(in_dev, scope);
1349                 if (addr)
1350                         goto out_unlock;
1351         }
1352
1353         /* Not loopback addresses on loopback should be preferred
1354            in this case. It is important that lo is the first interface
1355            in dev_base list.
1356          */
1357         for_each_netdev_rcu(net, dev) {
1358                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1359                         continue;
1360
1361                 in_dev = __in_dev_get_rcu(dev);
1362                 if (!in_dev)
1363                         continue;
1364
1365                 addr = in_dev_select_addr(in_dev, scope);
1366                 if (addr)
1367                         goto out_unlock;
1368         }
1369 out_unlock:
1370         rcu_read_unlock();
1371         return addr;
1372 }
1373 EXPORT_SYMBOL(inet_select_addr);
1374
1375 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1376                               __be32 local, int scope)
1377 {
1378         unsigned char localnet_scope = RT_SCOPE_HOST;
1379         const struct in_ifaddr *ifa;
1380         __be32 addr = 0;
1381         int same = 0;
1382
1383         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1384                 localnet_scope = RT_SCOPE_LINK;
1385
1386         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1387                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1388
1389                 if (!addr &&
1390                     (local == ifa->ifa_local || !local) &&
1391                     min_scope <= scope) {
1392                         addr = ifa->ifa_local;
1393                         if (same)
1394                                 break;
1395                 }
1396                 if (!same) {
1397                         same = (!local || inet_ifa_match(local, ifa)) &&
1398                                 (!dst || inet_ifa_match(dst, ifa));
1399                         if (same && addr) {
1400                                 if (local || !dst)
1401                                         break;
1402                                 /* Is the selected addr into dst subnet? */
1403                                 if (inet_ifa_match(addr, ifa))
1404                                         break;
1405                                 /* No, then can we use new local src? */
1406                                 if (min_scope <= scope) {
1407                                         addr = ifa->ifa_local;
1408                                         break;
1409                                 }
1410                                 /* search for large dst subnet for addr */
1411                                 same = 0;
1412                         }
1413                 }
1414         }
1415
1416         return same ? addr : 0;
1417 }
1418
1419 /*
1420  * Confirm that local IP address exists using wildcards:
1421  * - net: netns to check, cannot be NULL
1422  * - in_dev: only on this interface, NULL=any interface
1423  * - dst: only in the same subnet as dst, 0=any dst
1424  * - local: address, 0=autoselect the local address
1425  * - scope: maximum allowed scope value for the local address
1426  */
1427 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1428                          __be32 dst, __be32 local, int scope)
1429 {
1430         __be32 addr = 0;
1431         struct net_device *dev;
1432
1433         if (in_dev)
1434                 return confirm_addr_indev(in_dev, dst, local, scope);
1435
1436         rcu_read_lock();
1437         for_each_netdev_rcu(net, dev) {
1438                 in_dev = __in_dev_get_rcu(dev);
1439                 if (in_dev) {
1440                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1441                         if (addr)
1442                                 break;
1443                 }
1444         }
1445         rcu_read_unlock();
1446
1447         return addr;
1448 }
1449 EXPORT_SYMBOL(inet_confirm_addr);
1450
1451 /*
1452  *      Device notifier
1453  */
1454
1455 int register_inetaddr_notifier(struct notifier_block *nb)
1456 {
1457         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1458 }
1459 EXPORT_SYMBOL(register_inetaddr_notifier);
1460
1461 int unregister_inetaddr_notifier(struct notifier_block *nb)
1462 {
1463         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1464 }
1465 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1466
1467 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1468 {
1469         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1470 }
1471 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1472
1473 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1474 {
1475         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1476             nb);
1477 }
1478 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1479
1480 /* Rename ifa_labels for a device name change. Make some effort to preserve
1481  * existing alias numbering and to create unique labels if possible.
1482 */
1483 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1484 {
1485         struct in_ifaddr *ifa;
1486         int named = 0;
1487
1488         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1489                 char old[IFNAMSIZ], *dot;
1490
1491                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1492                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1493                 if (named++ == 0)
1494                         goto skip;
1495                 dot = strchr(old, ':');
1496                 if (!dot) {
1497                         sprintf(old, ":%d", named);
1498                         dot = old;
1499                 }
1500                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1501                         strcat(ifa->ifa_label, dot);
1502                 else
1503                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1504 skip:
1505                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1506         }
1507 }
1508
1509 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1510                                         struct in_device *in_dev)
1511
1512 {
1513         const struct in_ifaddr *ifa;
1514
1515         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1516                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1517                          ifa->ifa_local, dev,
1518                          ifa->ifa_local, NULL,
1519                          dev->dev_addr, NULL);
1520         }
1521 }
1522
1523 /* Called only under RTNL semaphore */
1524
1525 static int inetdev_event(struct notifier_block *this, unsigned long event,
1526                          void *ptr)
1527 {
1528         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1529         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1530
1531         ASSERT_RTNL();
1532
1533         if (!in_dev) {
1534                 if (event == NETDEV_REGISTER) {
1535                         in_dev = inetdev_init(dev);
1536                         if (IS_ERR(in_dev))
1537                                 return notifier_from_errno(PTR_ERR(in_dev));
1538                         if (dev->flags & IFF_LOOPBACK) {
1539                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1540                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1541                         }
1542                 } else if (event == NETDEV_CHANGEMTU) {
1543                         /* Re-enabling IP */
1544                         if (inetdev_valid_mtu(dev->mtu))
1545                                 in_dev = inetdev_init(dev);
1546                 }
1547                 goto out;
1548         }
1549
1550         switch (event) {
1551         case NETDEV_REGISTER:
1552                 pr_debug("%s: bug\n", __func__);
1553                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1554                 break;
1555         case NETDEV_UP:
1556                 if (!inetdev_valid_mtu(dev->mtu))
1557                         break;
1558                 if (dev->flags & IFF_LOOPBACK) {
1559                         struct in_ifaddr *ifa = inet_alloc_ifa();
1560
1561                         if (ifa) {
1562                                 INIT_HLIST_NODE(&ifa->hash);
1563                                 ifa->ifa_local =
1564                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1565                                 ifa->ifa_prefixlen = 8;
1566                                 ifa->ifa_mask = inet_make_mask(8);
1567                                 in_dev_hold(in_dev);
1568                                 ifa->ifa_dev = in_dev;
1569                                 ifa->ifa_scope = RT_SCOPE_HOST;
1570                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1571                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1572                                                  INFINITY_LIFE_TIME);
1573                                 ipv4_devconf_setall(in_dev);
1574                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1575                                 inet_insert_ifa(ifa);
1576                         }
1577                 }
1578                 ip_mc_up(in_dev);
1579                 fallthrough;
1580         case NETDEV_CHANGEADDR:
1581                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1582                         break;
1583                 fallthrough;
1584         case NETDEV_NOTIFY_PEERS:
1585                 /* Send gratuitous ARP to notify of link change */
1586                 inetdev_send_gratuitous_arp(dev, in_dev);
1587                 break;
1588         case NETDEV_DOWN:
1589                 ip_mc_down(in_dev);
1590                 break;
1591         case NETDEV_PRE_TYPE_CHANGE:
1592                 ip_mc_unmap(in_dev);
1593                 break;
1594         case NETDEV_POST_TYPE_CHANGE:
1595                 ip_mc_remap(in_dev);
1596                 break;
1597         case NETDEV_CHANGEMTU:
1598                 if (inetdev_valid_mtu(dev->mtu))
1599                         break;
1600                 /* disable IP when MTU is not enough */
1601                 fallthrough;
1602         case NETDEV_UNREGISTER:
1603                 inetdev_destroy(in_dev);
1604                 break;
1605         case NETDEV_CHANGENAME:
1606                 /* Do not notify about label change, this event is
1607                  * not interesting to applications using netlink.
1608                  */
1609                 inetdev_changename(dev, in_dev);
1610
1611                 devinet_sysctl_unregister(in_dev);
1612                 devinet_sysctl_register(in_dev);
1613                 break;
1614         }
1615 out:
1616         return NOTIFY_DONE;
1617 }
1618
1619 static struct notifier_block ip_netdev_notifier = {
1620         .notifier_call = inetdev_event,
1621 };
1622
1623 static size_t inet_nlmsg_size(void)
1624 {
1625         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1626                + nla_total_size(4) /* IFA_ADDRESS */
1627                + nla_total_size(4) /* IFA_LOCAL */
1628                + nla_total_size(4) /* IFA_BROADCAST */
1629                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1630                + nla_total_size(4)  /* IFA_FLAGS */
1631                + nla_total_size(1)  /* IFA_PROTO */
1632                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1633                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1634 }
1635
1636 static inline u32 cstamp_delta(unsigned long cstamp)
1637 {
1638         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1639 }
1640
1641 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1642                          unsigned long tstamp, u32 preferred, u32 valid)
1643 {
1644         struct ifa_cacheinfo ci;
1645
1646         ci.cstamp = cstamp_delta(cstamp);
1647         ci.tstamp = cstamp_delta(tstamp);
1648         ci.ifa_prefered = preferred;
1649         ci.ifa_valid = valid;
1650
1651         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1652 }
1653
1654 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1655                             struct inet_fill_args *args)
1656 {
1657         struct ifaddrmsg *ifm;
1658         struct nlmsghdr  *nlh;
1659         u32 preferred, valid;
1660
1661         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1662                         args->flags);
1663         if (!nlh)
1664                 return -EMSGSIZE;
1665
1666         ifm = nlmsg_data(nlh);
1667         ifm->ifa_family = AF_INET;
1668         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1669         ifm->ifa_flags = ifa->ifa_flags;
1670         ifm->ifa_scope = ifa->ifa_scope;
1671         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1672
1673         if (args->netnsid >= 0 &&
1674             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1675                 goto nla_put_failure;
1676
1677         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1678                 preferred = ifa->ifa_preferred_lft;
1679                 valid = ifa->ifa_valid_lft;
1680                 if (preferred != INFINITY_LIFE_TIME) {
1681                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1682
1683                         if (preferred > tval)
1684                                 preferred -= tval;
1685                         else
1686                                 preferred = 0;
1687                         if (valid != INFINITY_LIFE_TIME) {
1688                                 if (valid > tval)
1689                                         valid -= tval;
1690                                 else
1691                                         valid = 0;
1692                         }
1693                 }
1694         } else {
1695                 preferred = INFINITY_LIFE_TIME;
1696                 valid = INFINITY_LIFE_TIME;
1697         }
1698         if ((ifa->ifa_address &&
1699              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1700             (ifa->ifa_local &&
1701              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1702             (ifa->ifa_broadcast &&
1703              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1704             (ifa->ifa_label[0] &&
1705              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1706             (ifa->ifa_proto &&
1707              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1708             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1709             (ifa->ifa_rt_priority &&
1710              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1711             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1712                           preferred, valid))
1713                 goto nla_put_failure;
1714
1715         nlmsg_end(skb, nlh);
1716         return 0;
1717
1718 nla_put_failure:
1719         nlmsg_cancel(skb, nlh);
1720         return -EMSGSIZE;
1721 }
1722
1723 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1724                                       struct inet_fill_args *fillargs,
1725                                       struct net **tgt_net, struct sock *sk,
1726                                       struct netlink_callback *cb)
1727 {
1728         struct netlink_ext_ack *extack = cb->extack;
1729         struct nlattr *tb[IFA_MAX+1];
1730         struct ifaddrmsg *ifm;
1731         int err, i;
1732
1733         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1734                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1735                 return -EINVAL;
1736         }
1737
1738         ifm = nlmsg_data(nlh);
1739         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1740                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1741                 return -EINVAL;
1742         }
1743
1744         fillargs->ifindex = ifm->ifa_index;
1745         if (fillargs->ifindex) {
1746                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1747                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1748         }
1749
1750         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1751                                             ifa_ipv4_policy, extack);
1752         if (err < 0)
1753                 return err;
1754
1755         for (i = 0; i <= IFA_MAX; ++i) {
1756                 if (!tb[i])
1757                         continue;
1758
1759                 if (i == IFA_TARGET_NETNSID) {
1760                         struct net *net;
1761
1762                         fillargs->netnsid = nla_get_s32(tb[i]);
1763
1764                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1765                         if (IS_ERR(net)) {
1766                                 fillargs->netnsid = -1;
1767                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1768                                 return PTR_ERR(net);
1769                         }
1770                         *tgt_net = net;
1771                 } else {
1772                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1773                         return -EINVAL;
1774                 }
1775         }
1776
1777         return 0;
1778 }
1779
1780 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1781                             struct netlink_callback *cb, int s_ip_idx,
1782                             struct inet_fill_args *fillargs)
1783 {
1784         struct in_ifaddr *ifa;
1785         int ip_idx = 0;
1786         int err;
1787
1788         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1789                 if (ip_idx < s_ip_idx) {
1790                         ip_idx++;
1791                         continue;
1792                 }
1793                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1794                 if (err < 0)
1795                         goto done;
1796
1797                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1798                 ip_idx++;
1799         }
1800         err = 0;
1801
1802 done:
1803         cb->args[2] = ip_idx;
1804
1805         return err;
1806 }
1807
1808 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1809 {
1810         const struct nlmsghdr *nlh = cb->nlh;
1811         struct inet_fill_args fillargs = {
1812                 .portid = NETLINK_CB(cb->skb).portid,
1813                 .seq = nlh->nlmsg_seq,
1814                 .event = RTM_NEWADDR,
1815                 .flags = NLM_F_MULTI,
1816                 .netnsid = -1,
1817         };
1818         struct net *net = sock_net(skb->sk);
1819         struct net *tgt_net = net;
1820         int h, s_h;
1821         int idx, s_idx;
1822         int s_ip_idx;
1823         struct net_device *dev;
1824         struct in_device *in_dev;
1825         struct hlist_head *head;
1826         int err = 0;
1827
1828         s_h = cb->args[0];
1829         s_idx = idx = cb->args[1];
1830         s_ip_idx = cb->args[2];
1831
1832         if (cb->strict_check) {
1833                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1834                                                  skb->sk, cb);
1835                 if (err < 0)
1836                         goto put_tgt_net;
1837
1838                 err = 0;
1839                 if (fillargs.ifindex) {
1840                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1841                         if (!dev) {
1842                                 err = -ENODEV;
1843                                 goto put_tgt_net;
1844                         }
1845
1846                         in_dev = __in_dev_get_rtnl(dev);
1847                         if (in_dev) {
1848                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1849                                                        &fillargs);
1850                         }
1851                         goto put_tgt_net;
1852                 }
1853         }
1854
1855         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1856                 idx = 0;
1857                 head = &tgt_net->dev_index_head[h];
1858                 rcu_read_lock();
1859                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1860                           tgt_net->dev_base_seq;
1861                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1862                         if (idx < s_idx)
1863                                 goto cont;
1864                         if (h > s_h || idx > s_idx)
1865                                 s_ip_idx = 0;
1866                         in_dev = __in_dev_get_rcu(dev);
1867                         if (!in_dev)
1868                                 goto cont;
1869
1870                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1871                                                &fillargs);
1872                         if (err < 0) {
1873                                 rcu_read_unlock();
1874                                 goto done;
1875                         }
1876 cont:
1877                         idx++;
1878                 }
1879                 rcu_read_unlock();
1880         }
1881
1882 done:
1883         cb->args[0] = h;
1884         cb->args[1] = idx;
1885 put_tgt_net:
1886         if (fillargs.netnsid >= 0)
1887                 put_net(tgt_net);
1888
1889         return skb->len ? : err;
1890 }
1891
1892 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1893                       u32 portid)
1894 {
1895         struct inet_fill_args fillargs = {
1896                 .portid = portid,
1897                 .seq = nlh ? nlh->nlmsg_seq : 0,
1898                 .event = event,
1899                 .flags = 0,
1900                 .netnsid = -1,
1901         };
1902         struct sk_buff *skb;
1903         int err = -ENOBUFS;
1904         struct net *net;
1905
1906         net = dev_net(ifa->ifa_dev->dev);
1907         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1908         if (!skb)
1909                 goto errout;
1910
1911         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1912         if (err < 0) {
1913                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1914                 WARN_ON(err == -EMSGSIZE);
1915                 kfree_skb(skb);
1916                 goto errout;
1917         }
1918         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1919         return;
1920 errout:
1921         if (err < 0)
1922                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1923 }
1924
1925 static size_t inet_get_link_af_size(const struct net_device *dev,
1926                                     u32 ext_filter_mask)
1927 {
1928         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1929
1930         if (!in_dev)
1931                 return 0;
1932
1933         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1934 }
1935
1936 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1937                              u32 ext_filter_mask)
1938 {
1939         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1940         struct nlattr *nla;
1941         int i;
1942
1943         if (!in_dev)
1944                 return -ENODATA;
1945
1946         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1947         if (!nla)
1948                 return -EMSGSIZE;
1949
1950         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1951                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1952
1953         return 0;
1954 }
1955
1956 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1957         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1958 };
1959
1960 static int inet_validate_link_af(const struct net_device *dev,
1961                                  const struct nlattr *nla,
1962                                  struct netlink_ext_ack *extack)
1963 {
1964         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1965         int err, rem;
1966
1967         if (dev && !__in_dev_get_rtnl(dev))
1968                 return -EAFNOSUPPORT;
1969
1970         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1971                                           inet_af_policy, extack);
1972         if (err < 0)
1973                 return err;
1974
1975         if (tb[IFLA_INET_CONF]) {
1976                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1977                         int cfgid = nla_type(a);
1978
1979                         if (nla_len(a) < 4)
1980                                 return -EINVAL;
1981
1982                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1983                                 return -EINVAL;
1984                 }
1985         }
1986
1987         return 0;
1988 }
1989
1990 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1991                             struct netlink_ext_ack *extack)
1992 {
1993         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1994         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1995         int rem;
1996
1997         if (!in_dev)
1998                 return -EAFNOSUPPORT;
1999
2000         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2001                 return -EINVAL;
2002
2003         if (tb[IFLA_INET_CONF]) {
2004                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2005                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2006         }
2007
2008         return 0;
2009 }
2010
2011 static int inet_netconf_msgsize_devconf(int type)
2012 {
2013         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2014                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2015         bool all = false;
2016
2017         if (type == NETCONFA_ALL)
2018                 all = true;
2019
2020         if (all || type == NETCONFA_FORWARDING)
2021                 size += nla_total_size(4);
2022         if (all || type == NETCONFA_RP_FILTER)
2023                 size += nla_total_size(4);
2024         if (all || type == NETCONFA_MC_FORWARDING)
2025                 size += nla_total_size(4);
2026         if (all || type == NETCONFA_BC_FORWARDING)
2027                 size += nla_total_size(4);
2028         if (all || type == NETCONFA_PROXY_NEIGH)
2029                 size += nla_total_size(4);
2030         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2031                 size += nla_total_size(4);
2032
2033         return size;
2034 }
2035
2036 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2037                                      struct ipv4_devconf *devconf, u32 portid,
2038                                      u32 seq, int event, unsigned int flags,
2039                                      int type)
2040 {
2041         struct nlmsghdr  *nlh;
2042         struct netconfmsg *ncm;
2043         bool all = false;
2044
2045         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2046                         flags);
2047         if (!nlh)
2048                 return -EMSGSIZE;
2049
2050         if (type == NETCONFA_ALL)
2051                 all = true;
2052
2053         ncm = nlmsg_data(nlh);
2054         ncm->ncm_family = AF_INET;
2055
2056         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2057                 goto nla_put_failure;
2058
2059         if (!devconf)
2060                 goto out;
2061
2062         if ((all || type == NETCONFA_FORWARDING) &&
2063             nla_put_s32(skb, NETCONFA_FORWARDING,
2064                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2065                 goto nla_put_failure;
2066         if ((all || type == NETCONFA_RP_FILTER) &&
2067             nla_put_s32(skb, NETCONFA_RP_FILTER,
2068                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2069                 goto nla_put_failure;
2070         if ((all || type == NETCONFA_MC_FORWARDING) &&
2071             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2072                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2073                 goto nla_put_failure;
2074         if ((all || type == NETCONFA_BC_FORWARDING) &&
2075             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2076                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2077                 goto nla_put_failure;
2078         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2079             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2080                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2081                 goto nla_put_failure;
2082         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2083             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2084                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2085                 goto nla_put_failure;
2086
2087 out:
2088         nlmsg_end(skb, nlh);
2089         return 0;
2090
2091 nla_put_failure:
2092         nlmsg_cancel(skb, nlh);
2093         return -EMSGSIZE;
2094 }
2095
2096 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2097                                  int ifindex, struct ipv4_devconf *devconf)
2098 {
2099         struct sk_buff *skb;
2100         int err = -ENOBUFS;
2101
2102         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2103         if (!skb)
2104                 goto errout;
2105
2106         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2107                                         event, 0, type);
2108         if (err < 0) {
2109                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2110                 WARN_ON(err == -EMSGSIZE);
2111                 kfree_skb(skb);
2112                 goto errout;
2113         }
2114         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2115         return;
2116 errout:
2117         if (err < 0)
2118                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2119 }
2120
2121 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2122         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2123         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2124         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2125         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2126         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2127 };
2128
2129 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2130                                       const struct nlmsghdr *nlh,
2131                                       struct nlattr **tb,
2132                                       struct netlink_ext_ack *extack)
2133 {
2134         int i, err;
2135
2136         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2137                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2138                 return -EINVAL;
2139         }
2140
2141         if (!netlink_strict_get_check(skb))
2142                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2143                                               tb, NETCONFA_MAX,
2144                                               devconf_ipv4_policy, extack);
2145
2146         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2147                                             tb, NETCONFA_MAX,
2148                                             devconf_ipv4_policy, extack);
2149         if (err)
2150                 return err;
2151
2152         for (i = 0; i <= NETCONFA_MAX; i++) {
2153                 if (!tb[i])
2154                         continue;
2155
2156                 switch (i) {
2157                 case NETCONFA_IFINDEX:
2158                         break;
2159                 default:
2160                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2161                         return -EINVAL;
2162                 }
2163         }
2164
2165         return 0;
2166 }
2167
2168 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2169                                     struct nlmsghdr *nlh,
2170                                     struct netlink_ext_ack *extack)
2171 {
2172         struct net *net = sock_net(in_skb->sk);
2173         struct nlattr *tb[NETCONFA_MAX+1];
2174         struct sk_buff *skb;
2175         struct ipv4_devconf *devconf;
2176         struct in_device *in_dev;
2177         struct net_device *dev;
2178         int ifindex;
2179         int err;
2180
2181         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2182         if (err)
2183                 goto errout;
2184
2185         err = -EINVAL;
2186         if (!tb[NETCONFA_IFINDEX])
2187                 goto errout;
2188
2189         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2190         switch (ifindex) {
2191         case NETCONFA_IFINDEX_ALL:
2192                 devconf = net->ipv4.devconf_all;
2193                 break;
2194         case NETCONFA_IFINDEX_DEFAULT:
2195                 devconf = net->ipv4.devconf_dflt;
2196                 break;
2197         default:
2198                 dev = __dev_get_by_index(net, ifindex);
2199                 if (!dev)
2200                         goto errout;
2201                 in_dev = __in_dev_get_rtnl(dev);
2202                 if (!in_dev)
2203                         goto errout;
2204                 devconf = &in_dev->cnf;
2205                 break;
2206         }
2207
2208         err = -ENOBUFS;
2209         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2210         if (!skb)
2211                 goto errout;
2212
2213         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2214                                         NETLINK_CB(in_skb).portid,
2215                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2216                                         NETCONFA_ALL);
2217         if (err < 0) {
2218                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2219                 WARN_ON(err == -EMSGSIZE);
2220                 kfree_skb(skb);
2221                 goto errout;
2222         }
2223         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2224 errout:
2225         return err;
2226 }
2227
2228 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2229                                      struct netlink_callback *cb)
2230 {
2231         const struct nlmsghdr *nlh = cb->nlh;
2232         struct net *net = sock_net(skb->sk);
2233         int h, s_h;
2234         int idx, s_idx;
2235         struct net_device *dev;
2236         struct in_device *in_dev;
2237         struct hlist_head *head;
2238
2239         if (cb->strict_check) {
2240                 struct netlink_ext_ack *extack = cb->extack;
2241                 struct netconfmsg *ncm;
2242
2243                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2244                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2245                         return -EINVAL;
2246                 }
2247
2248                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2249                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2250                         return -EINVAL;
2251                 }
2252         }
2253
2254         s_h = cb->args[0];
2255         s_idx = idx = cb->args[1];
2256
2257         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2258                 idx = 0;
2259                 head = &net->dev_index_head[h];
2260                 rcu_read_lock();
2261                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2262                           net->dev_base_seq;
2263                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2264                         if (idx < s_idx)
2265                                 goto cont;
2266                         in_dev = __in_dev_get_rcu(dev);
2267                         if (!in_dev)
2268                                 goto cont;
2269
2270                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2271                                                       &in_dev->cnf,
2272                                                       NETLINK_CB(cb->skb).portid,
2273                                                       nlh->nlmsg_seq,
2274                                                       RTM_NEWNETCONF,
2275                                                       NLM_F_MULTI,
2276                                                       NETCONFA_ALL) < 0) {
2277                                 rcu_read_unlock();
2278                                 goto done;
2279                         }
2280                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2281 cont:
2282                         idx++;
2283                 }
2284                 rcu_read_unlock();
2285         }
2286         if (h == NETDEV_HASHENTRIES) {
2287                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2288                                               net->ipv4.devconf_all,
2289                                               NETLINK_CB(cb->skb).portid,
2290                                               nlh->nlmsg_seq,
2291                                               RTM_NEWNETCONF, NLM_F_MULTI,
2292                                               NETCONFA_ALL) < 0)
2293                         goto done;
2294                 else
2295                         h++;
2296         }
2297         if (h == NETDEV_HASHENTRIES + 1) {
2298                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2299                                               net->ipv4.devconf_dflt,
2300                                               NETLINK_CB(cb->skb).portid,
2301                                               nlh->nlmsg_seq,
2302                                               RTM_NEWNETCONF, NLM_F_MULTI,
2303                                               NETCONFA_ALL) < 0)
2304                         goto done;
2305                 else
2306                         h++;
2307         }
2308 done:
2309         cb->args[0] = h;
2310         cb->args[1] = idx;
2311
2312         return skb->len;
2313 }
2314
2315 #ifdef CONFIG_SYSCTL
2316
2317 static void devinet_copy_dflt_conf(struct net *net, int i)
2318 {
2319         struct net_device *dev;
2320
2321         rcu_read_lock();
2322         for_each_netdev_rcu(net, dev) {
2323                 struct in_device *in_dev;
2324
2325                 in_dev = __in_dev_get_rcu(dev);
2326                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2327                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2328         }
2329         rcu_read_unlock();
2330 }
2331
2332 /* called with RTNL locked */
2333 static void inet_forward_change(struct net *net)
2334 {
2335         struct net_device *dev;
2336         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2337
2338         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2339         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2340         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2341                                     NETCONFA_FORWARDING,
2342                                     NETCONFA_IFINDEX_ALL,
2343                                     net->ipv4.devconf_all);
2344         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2345                                     NETCONFA_FORWARDING,
2346                                     NETCONFA_IFINDEX_DEFAULT,
2347                                     net->ipv4.devconf_dflt);
2348
2349         for_each_netdev(net, dev) {
2350                 struct in_device *in_dev;
2351
2352                 if (on)
2353                         dev_disable_lro(dev);
2354
2355                 in_dev = __in_dev_get_rtnl(dev);
2356                 if (in_dev) {
2357                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2358                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2359                                                     NETCONFA_FORWARDING,
2360                                                     dev->ifindex, &in_dev->cnf);
2361                 }
2362         }
2363 }
2364
2365 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2366 {
2367         if (cnf == net->ipv4.devconf_dflt)
2368                 return NETCONFA_IFINDEX_DEFAULT;
2369         else if (cnf == net->ipv4.devconf_all)
2370                 return NETCONFA_IFINDEX_ALL;
2371         else {
2372                 struct in_device *idev
2373                         = container_of(cnf, struct in_device, cnf);
2374                 return idev->dev->ifindex;
2375         }
2376 }
2377
2378 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2379                              void *buffer, size_t *lenp, loff_t *ppos)
2380 {
2381         int old_value = *(int *)ctl->data;
2382         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2383         int new_value = *(int *)ctl->data;
2384
2385         if (write) {
2386                 struct ipv4_devconf *cnf = ctl->extra1;
2387                 struct net *net = ctl->extra2;
2388                 int i = (int *)ctl->data - cnf->data;
2389                 int ifindex;
2390
2391                 set_bit(i, cnf->state);
2392
2393                 if (cnf == net->ipv4.devconf_dflt)
2394                         devinet_copy_dflt_conf(net, i);
2395                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2396                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2397                         if ((new_value == 0) && (old_value != 0))
2398                                 rt_cache_flush(net);
2399
2400                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2401                     new_value != old_value)
2402                         rt_cache_flush(net);
2403
2404                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2405                     new_value != old_value) {
2406                         ifindex = devinet_conf_ifindex(net, cnf);
2407                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2408                                                     NETCONFA_RP_FILTER,
2409                                                     ifindex, cnf);
2410                 }
2411                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2412                     new_value != old_value) {
2413                         ifindex = devinet_conf_ifindex(net, cnf);
2414                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2415                                                     NETCONFA_PROXY_NEIGH,
2416                                                     ifindex, cnf);
2417                 }
2418                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2419                     new_value != old_value) {
2420                         ifindex = devinet_conf_ifindex(net, cnf);
2421                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2422                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2423                                                     ifindex, cnf);
2424                 }
2425         }
2426
2427         return ret;
2428 }
2429
2430 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2431                                   void *buffer, size_t *lenp, loff_t *ppos)
2432 {
2433         int *valp = ctl->data;
2434         int val = *valp;
2435         loff_t pos = *ppos;
2436         struct net *net = ctl->extra2;
2437         int ret;
2438
2439         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2440                 return -EPERM;
2441
2442         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2443
2444         if (write && *valp != val) {
2445                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2446                         if (!rtnl_trylock()) {
2447                                 /* Restore the original values before restarting */
2448                                 *valp = val;
2449                                 *ppos = pos;
2450                                 return restart_syscall();
2451                         }
2452                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2453                                 inet_forward_change(net);
2454                         } else {
2455                                 struct ipv4_devconf *cnf = ctl->extra1;
2456                                 struct in_device *idev =
2457                                         container_of(cnf, struct in_device, cnf);
2458                                 if (*valp)
2459                                         dev_disable_lro(idev->dev);
2460                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2461                                                             NETCONFA_FORWARDING,
2462                                                             idev->dev->ifindex,
2463                                                             cnf);
2464                         }
2465                         rtnl_unlock();
2466                         rt_cache_flush(net);
2467                 } else
2468                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2469                                                     NETCONFA_FORWARDING,
2470                                                     NETCONFA_IFINDEX_DEFAULT,
2471                                                     net->ipv4.devconf_dflt);
2472         }
2473
2474         return ret;
2475 }
2476
2477 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2478                                 void *buffer, size_t *lenp, loff_t *ppos)
2479 {
2480         int *valp = ctl->data;
2481         int val = *valp;
2482         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2483         struct net *net = ctl->extra2;
2484
2485         if (write && *valp != val)
2486                 rt_cache_flush(net);
2487
2488         return ret;
2489 }
2490
2491 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2492         { \
2493                 .procname       = name, \
2494                 .data           = ipv4_devconf.data + \
2495                                   IPV4_DEVCONF_ ## attr - 1, \
2496                 .maxlen         = sizeof(int), \
2497                 .mode           = mval, \
2498                 .proc_handler   = proc, \
2499                 .extra1         = &ipv4_devconf, \
2500         }
2501
2502 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2503         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2504
2505 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2506         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2507
2508 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2509         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2510
2511 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2512         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2513
2514 static struct devinet_sysctl_table {
2515         struct ctl_table_header *sysctl_header;
2516         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2517 } devinet_sysctl = {
2518         .devinet_vars = {
2519                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2520                                              devinet_sysctl_forward),
2521                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2522                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2523
2524                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2525                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2526                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2527                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2528                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2529                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2530                                         "accept_source_route"),
2531                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2532                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2533                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2534                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2535                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2536                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2537                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2538                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2539                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2540                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2541                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2542                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2543                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2544                                         "arp_evict_nocarrier"),
2545                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2546                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2547                                         "force_igmp_version"),
2548                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2549                                         "igmpv2_unsolicited_report_interval"),
2550                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2551                                         "igmpv3_unsolicited_report_interval"),
2552                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2553                                         "ignore_routes_with_linkdown"),
2554                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2555                                         "drop_gratuitous_arp"),
2556
2557                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2558                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2559                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2560                                               "promote_secondaries"),
2561                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2562                                               "route_localnet"),
2563                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2564                                               "drop_unicast_in_l2_multicast"),
2565         },
2566 };
2567
2568 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2569                                      int ifindex, struct ipv4_devconf *p)
2570 {
2571         int i;
2572         struct devinet_sysctl_table *t;
2573         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2574
2575         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2576         if (!t)
2577                 goto out;
2578
2579         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2580                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2581                 t->devinet_vars[i].extra1 = p;
2582                 t->devinet_vars[i].extra2 = net;
2583         }
2584
2585         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2586
2587         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2588         if (!t->sysctl_header)
2589                 goto free;
2590
2591         p->sysctl = t;
2592
2593         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2594                                     ifindex, p);
2595         return 0;
2596
2597 free:
2598         kfree(t);
2599 out:
2600         return -ENOMEM;
2601 }
2602
2603 static void __devinet_sysctl_unregister(struct net *net,
2604                                         struct ipv4_devconf *cnf, int ifindex)
2605 {
2606         struct devinet_sysctl_table *t = cnf->sysctl;
2607
2608         if (t) {
2609                 cnf->sysctl = NULL;
2610                 unregister_net_sysctl_table(t->sysctl_header);
2611                 kfree(t);
2612         }
2613
2614         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2615 }
2616
2617 static int devinet_sysctl_register(struct in_device *idev)
2618 {
2619         int err;
2620
2621         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2622                 return -EINVAL;
2623
2624         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2625         if (err)
2626                 return err;
2627         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2628                                         idev->dev->ifindex, &idev->cnf);
2629         if (err)
2630                 neigh_sysctl_unregister(idev->arp_parms);
2631         return err;
2632 }
2633
2634 static void devinet_sysctl_unregister(struct in_device *idev)
2635 {
2636         struct net *net = dev_net(idev->dev);
2637
2638         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2639         neigh_sysctl_unregister(idev->arp_parms);
2640 }
2641
2642 static struct ctl_table ctl_forward_entry[] = {
2643         {
2644                 .procname       = "ip_forward",
2645                 .data           = &ipv4_devconf.data[
2646                                         IPV4_DEVCONF_FORWARDING - 1],
2647                 .maxlen         = sizeof(int),
2648                 .mode           = 0644,
2649                 .proc_handler   = devinet_sysctl_forward,
2650                 .extra1         = &ipv4_devconf,
2651                 .extra2         = &init_net,
2652         },
2653         { },
2654 };
2655 #endif
2656
2657 static __net_init int devinet_init_net(struct net *net)
2658 {
2659         int err;
2660         struct ipv4_devconf *all, *dflt;
2661 #ifdef CONFIG_SYSCTL
2662         struct ctl_table *tbl;
2663         struct ctl_table_header *forw_hdr;
2664 #endif
2665
2666         err = -ENOMEM;
2667         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2668         if (!all)
2669                 goto err_alloc_all;
2670
2671         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2672         if (!dflt)
2673                 goto err_alloc_dflt;
2674
2675 #ifdef CONFIG_SYSCTL
2676         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2677         if (!tbl)
2678                 goto err_alloc_ctl;
2679
2680         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2681         tbl[0].extra1 = all;
2682         tbl[0].extra2 = net;
2683 #endif
2684
2685         if (!net_eq(net, &init_net)) {
2686                 switch (net_inherit_devconf()) {
2687                 case 3:
2688                         /* copy from the current netns */
2689                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2690                                sizeof(ipv4_devconf));
2691                         memcpy(dflt,
2692                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2693                                sizeof(ipv4_devconf_dflt));
2694                         break;
2695                 case 0:
2696                 case 1:
2697                         /* copy from init_net */
2698                         memcpy(all, init_net.ipv4.devconf_all,
2699                                sizeof(ipv4_devconf));
2700                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2701                                sizeof(ipv4_devconf_dflt));
2702                         break;
2703                 case 2:
2704                         /* use compiled values */
2705                         break;
2706                 }
2707         }
2708
2709 #ifdef CONFIG_SYSCTL
2710         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2711         if (err < 0)
2712                 goto err_reg_all;
2713
2714         err = __devinet_sysctl_register(net, "default",
2715                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2716         if (err < 0)
2717                 goto err_reg_dflt;
2718
2719         err = -ENOMEM;
2720         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2721         if (!forw_hdr)
2722                 goto err_reg_ctl;
2723         net->ipv4.forw_hdr = forw_hdr;
2724 #endif
2725
2726         net->ipv4.devconf_all = all;
2727         net->ipv4.devconf_dflt = dflt;
2728         return 0;
2729
2730 #ifdef CONFIG_SYSCTL
2731 err_reg_ctl:
2732         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2733 err_reg_dflt:
2734         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2735 err_reg_all:
2736         kfree(tbl);
2737 err_alloc_ctl:
2738 #endif
2739         kfree(dflt);
2740 err_alloc_dflt:
2741         kfree(all);
2742 err_alloc_all:
2743         return err;
2744 }
2745
2746 static __net_exit void devinet_exit_net(struct net *net)
2747 {
2748 #ifdef CONFIG_SYSCTL
2749         struct ctl_table *tbl;
2750
2751         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2752         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2753         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2754                                     NETCONFA_IFINDEX_DEFAULT);
2755         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2756                                     NETCONFA_IFINDEX_ALL);
2757         kfree(tbl);
2758 #endif
2759         kfree(net->ipv4.devconf_dflt);
2760         kfree(net->ipv4.devconf_all);
2761 }
2762
2763 static __net_initdata struct pernet_operations devinet_ops = {
2764         .init = devinet_init_net,
2765         .exit = devinet_exit_net,
2766 };
2767
2768 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2769         .family           = AF_INET,
2770         .fill_link_af     = inet_fill_link_af,
2771         .get_link_af_size = inet_get_link_af_size,
2772         .validate_link_af = inet_validate_link_af,
2773         .set_link_af      = inet_set_link_af,
2774 };
2775
2776 void __init devinet_init(void)
2777 {
2778         int i;
2779
2780         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2781                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2782
2783         register_pernet_subsys(&devinet_ops);
2784         register_netdevice_notifier(&ip_netdev_notifier);
2785
2786         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2787
2788         rtnl_af_register(&inet_af_ops);
2789
2790         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2791         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2792         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2793         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2794                       inet_netconf_dump_devconf, 0);
2795 }