Merge remote-tracking branch 'stable/linux-4.19.y' into rpi-4.19.y
[platform/kernel/linux-rpi.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #define IPV6ONLY_FLAGS  \
70                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
71                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
72                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
73
74 static struct ipv4_devconf ipv4_devconf = {
75         .data = {
76                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
80                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
81                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
82         },
83 };
84
85 static struct ipv4_devconf ipv4_devconf_dflt = {
86         .data = {
87                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
88                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
89                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
90                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
91                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
92                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
93                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
94         },
95 };
96
97 #define IPV4_DEVCONF_DFLT(net, attr) \
98         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
99
100 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
101         [IFA_LOCAL]             = { .type = NLA_U32 },
102         [IFA_ADDRESS]           = { .type = NLA_U32 },
103         [IFA_BROADCAST]         = { .type = NLA_U32 },
104         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
105         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
106         [IFA_FLAGS]             = { .type = NLA_U32 },
107         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
108 };
109
110 #define IN4_ADDR_HSIZE_SHIFT    8
111 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
112
113 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
114
115 static u32 inet_addr_hash(const struct net *net, __be32 addr)
116 {
117         u32 val = (__force u32) addr ^ net_hash_mix(net);
118
119         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
120 }
121
122 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
123 {
124         u32 hash = inet_addr_hash(net, ifa->ifa_local);
125
126         ASSERT_RTNL();
127         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
128 }
129
130 static void inet_hash_remove(struct in_ifaddr *ifa)
131 {
132         ASSERT_RTNL();
133         hlist_del_init_rcu(&ifa->hash);
134 }
135
136 /**
137  * __ip_dev_find - find the first device with a given source address.
138  * @net: the net namespace
139  * @addr: the source address
140  * @devref: if true, take a reference on the found device
141  *
142  * If a caller uses devref=false, it should be protected by RCU, or RTNL
143  */
144 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
145 {
146         struct net_device *result = NULL;
147         struct in_ifaddr *ifa;
148
149         rcu_read_lock();
150         ifa = inet_lookup_ifaddr_rcu(net, addr);
151         if (!ifa) {
152                 struct flowi4 fl4 = { .daddr = addr };
153                 struct fib_result res = { 0 };
154                 struct fib_table *local;
155
156                 /* Fallback to FIB local table so that communication
157                  * over loopback subnets work.
158                  */
159                 local = fib_get_table(net, RT_TABLE_LOCAL);
160                 if (local &&
161                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
162                     res.type == RTN_LOCAL)
163                         result = FIB_RES_DEV(res);
164         } else {
165                 result = ifa->ifa_dev->dev;
166         }
167         if (result && devref)
168                 dev_hold(result);
169         rcu_read_unlock();
170         return result;
171 }
172 EXPORT_SYMBOL(__ip_dev_find);
173
174 /* called under RCU lock */
175 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
176 {
177         u32 hash = inet_addr_hash(net, addr);
178         struct in_ifaddr *ifa;
179
180         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
181                 if (ifa->ifa_local == addr &&
182                     net_eq(dev_net(ifa->ifa_dev->dev), net))
183                         return ifa;
184
185         return NULL;
186 }
187
188 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
189
190 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
191 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
192 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
193                          int destroy);
194 #ifdef CONFIG_SYSCTL
195 static int devinet_sysctl_register(struct in_device *idev);
196 static void devinet_sysctl_unregister(struct in_device *idev);
197 #else
198 static int devinet_sysctl_register(struct in_device *idev)
199 {
200         return 0;
201 }
202 static void devinet_sysctl_unregister(struct in_device *idev)
203 {
204 }
205 #endif
206
207 /* Locks all the inet devices. */
208
209 static struct in_ifaddr *inet_alloc_ifa(void)
210 {
211         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
212 }
213
214 static void inet_rcu_free_ifa(struct rcu_head *head)
215 {
216         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
217         if (ifa->ifa_dev)
218                 in_dev_put(ifa->ifa_dev);
219         kfree(ifa);
220 }
221
222 static void inet_free_ifa(struct in_ifaddr *ifa)
223 {
224         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
225 }
226
227 void in_dev_finish_destroy(struct in_device *idev)
228 {
229         struct net_device *dev = idev->dev;
230
231         WARN_ON(idev->ifa_list);
232         WARN_ON(idev->mc_list);
233         kfree(rcu_dereference_protected(idev->mc_hash, 1));
234 #ifdef NET_REFCNT_DEBUG
235         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
236 #endif
237         dev_put(dev);
238         if (!idev->dead)
239                 pr_err("Freeing alive in_device %p\n", idev);
240         else
241                 kfree(idev);
242 }
243 EXPORT_SYMBOL(in_dev_finish_destroy);
244
245 static struct in_device *inetdev_init(struct net_device *dev)
246 {
247         struct in_device *in_dev;
248         int err = -ENOMEM;
249
250         ASSERT_RTNL();
251
252         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
253         if (!in_dev)
254                 goto out;
255         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
256                         sizeof(in_dev->cnf));
257         in_dev->cnf.sysctl = NULL;
258         in_dev->dev = dev;
259         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
260         if (!in_dev->arp_parms)
261                 goto out_kfree;
262         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
263                 dev_disable_lro(dev);
264         /* Reference in_dev->dev */
265         dev_hold(dev);
266         /* Account for reference dev->ip_ptr (below) */
267         refcount_set(&in_dev->refcnt, 1);
268
269         err = devinet_sysctl_register(in_dev);
270         if (err) {
271                 in_dev->dead = 1;
272                 in_dev_put(in_dev);
273                 in_dev = NULL;
274                 goto out;
275         }
276         ip_mc_init_dev(in_dev);
277         if (dev->flags & IFF_UP)
278                 ip_mc_up(in_dev);
279
280         /* we can receive as soon as ip_ptr is set -- do this last */
281         rcu_assign_pointer(dev->ip_ptr, in_dev);
282 out:
283         return in_dev ?: ERR_PTR(err);
284 out_kfree:
285         kfree(in_dev);
286         in_dev = NULL;
287         goto out;
288 }
289
290 static void in_dev_rcu_put(struct rcu_head *head)
291 {
292         struct in_device *idev = container_of(head, struct in_device, rcu_head);
293         in_dev_put(idev);
294 }
295
296 static void inetdev_destroy(struct in_device *in_dev)
297 {
298         struct in_ifaddr *ifa;
299         struct net_device *dev;
300
301         ASSERT_RTNL();
302
303         dev = in_dev->dev;
304
305         in_dev->dead = 1;
306
307         ip_mc_destroy_dev(in_dev);
308
309         while ((ifa = in_dev->ifa_list) != NULL) {
310                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
311                 inet_free_ifa(ifa);
312         }
313
314         RCU_INIT_POINTER(dev->ip_ptr, NULL);
315
316         devinet_sysctl_unregister(in_dev);
317         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
318         arp_ifdown(dev);
319
320         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
321 }
322
323 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
324 {
325         rcu_read_lock();
326         for_primary_ifa(in_dev) {
327                 if (inet_ifa_match(a, ifa)) {
328                         if (!b || inet_ifa_match(b, ifa)) {
329                                 rcu_read_unlock();
330                                 return 1;
331                         }
332                 }
333         } endfor_ifa(in_dev);
334         rcu_read_unlock();
335         return 0;
336 }
337
338 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
339                          int destroy, struct nlmsghdr *nlh, u32 portid)
340 {
341         struct in_ifaddr *promote = NULL;
342         struct in_ifaddr *ifa, *ifa1 = *ifap;
343         struct in_ifaddr *last_prim = in_dev->ifa_list;
344         struct in_ifaddr *prev_prom = NULL;
345         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
346
347         ASSERT_RTNL();
348
349         if (in_dev->dead)
350                 goto no_promotions;
351
352         /* 1. Deleting primary ifaddr forces deletion all secondaries
353          * unless alias promotion is set
354          **/
355
356         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
357                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
358
359                 while ((ifa = *ifap1) != NULL) {
360                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
361                             ifa1->ifa_scope <= ifa->ifa_scope)
362                                 last_prim = ifa;
363
364                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
365                             ifa1->ifa_mask != ifa->ifa_mask ||
366                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
367                                 ifap1 = &ifa->ifa_next;
368                                 prev_prom = ifa;
369                                 continue;
370                         }
371
372                         if (!do_promote) {
373                                 inet_hash_remove(ifa);
374                                 *ifap1 = ifa->ifa_next;
375
376                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
377                                 blocking_notifier_call_chain(&inetaddr_chain,
378                                                 NETDEV_DOWN, ifa);
379                                 inet_free_ifa(ifa);
380                         } else {
381                                 promote = ifa;
382                                 break;
383                         }
384                 }
385         }
386
387         /* On promotion all secondaries from subnet are changing
388          * the primary IP, we must remove all their routes silently
389          * and later to add them back with new prefsrc. Do this
390          * while all addresses are on the device list.
391          */
392         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
393                 if (ifa1->ifa_mask == ifa->ifa_mask &&
394                     inet_ifa_match(ifa1->ifa_address, ifa))
395                         fib_del_ifaddr(ifa, ifa1);
396         }
397
398 no_promotions:
399         /* 2. Unlink it */
400
401         *ifap = ifa1->ifa_next;
402         inet_hash_remove(ifa1);
403
404         /* 3. Announce address deletion */
405
406         /* Send message first, then call notifier.
407            At first sight, FIB update triggered by notifier
408            will refer to already deleted ifaddr, that could confuse
409            netlink listeners. It is not true: look, gated sees
410            that route deleted and if it still thinks that ifaddr
411            is valid, it will try to restore deleted routes... Grr.
412            So that, this order is correct.
413          */
414         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
415         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
416
417         if (promote) {
418                 struct in_ifaddr *next_sec = promote->ifa_next;
419
420                 if (prev_prom) {
421                         prev_prom->ifa_next = promote->ifa_next;
422                         promote->ifa_next = last_prim->ifa_next;
423                         last_prim->ifa_next = promote;
424                 }
425
426                 promote->ifa_flags &= ~IFA_F_SECONDARY;
427                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
428                 blocking_notifier_call_chain(&inetaddr_chain,
429                                 NETDEV_UP, promote);
430                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
431                         if (ifa1->ifa_mask != ifa->ifa_mask ||
432                             !inet_ifa_match(ifa1->ifa_address, ifa))
433                                         continue;
434                         fib_add_ifaddr(ifa);
435                 }
436
437         }
438         if (destroy)
439                 inet_free_ifa(ifa1);
440 }
441
442 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
443                          int destroy)
444 {
445         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
446 }
447
448 static void check_lifetime(struct work_struct *work);
449
450 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
451
452 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
453                              u32 portid, struct netlink_ext_ack *extack)
454 {
455         struct in_device *in_dev = ifa->ifa_dev;
456         struct in_ifaddr *ifa1, **ifap, **last_primary;
457         struct in_validator_info ivi;
458         int ret;
459
460         ASSERT_RTNL();
461
462         if (!ifa->ifa_local) {
463                 inet_free_ifa(ifa);
464                 return 0;
465         }
466
467         ifa->ifa_flags &= ~IFA_F_SECONDARY;
468         last_primary = &in_dev->ifa_list;
469
470         /* Don't set IPv6 only flags to IPv4 addresses */
471         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
472
473         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
474              ifap = &ifa1->ifa_next) {
475                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
476                     ifa->ifa_scope <= ifa1->ifa_scope)
477                         last_primary = &ifa1->ifa_next;
478                 if (ifa1->ifa_mask == ifa->ifa_mask &&
479                     inet_ifa_match(ifa1->ifa_address, ifa)) {
480                         if (ifa1->ifa_local == ifa->ifa_local) {
481                                 inet_free_ifa(ifa);
482                                 return -EEXIST;
483                         }
484                         if (ifa1->ifa_scope != ifa->ifa_scope) {
485                                 inet_free_ifa(ifa);
486                                 return -EINVAL;
487                         }
488                         ifa->ifa_flags |= IFA_F_SECONDARY;
489                 }
490         }
491
492         /* Allow any devices that wish to register ifaddr validtors to weigh
493          * in now, before changes are committed.  The rntl lock is serializing
494          * access here, so the state should not change between a validator call
495          * and a final notify on commit.  This isn't invoked on promotion under
496          * the assumption that validators are checking the address itself, and
497          * not the flags.
498          */
499         ivi.ivi_addr = ifa->ifa_address;
500         ivi.ivi_dev = ifa->ifa_dev;
501         ivi.extack = extack;
502         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
503                                            NETDEV_UP, &ivi);
504         ret = notifier_to_errno(ret);
505         if (ret) {
506                 inet_free_ifa(ifa);
507                 return ret;
508         }
509
510         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
511                 prandom_seed((__force u32) ifa->ifa_local);
512                 ifap = last_primary;
513         }
514
515         ifa->ifa_next = *ifap;
516         *ifap = ifa;
517
518         inet_hash_insert(dev_net(in_dev->dev), ifa);
519
520         cancel_delayed_work(&check_lifetime_work);
521         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
522
523         /* Send message first, then call notifier.
524            Notifier will trigger FIB update, so that
525            listeners of netlink will know about new ifaddr */
526         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
527         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
528
529         return 0;
530 }
531
532 static int inet_insert_ifa(struct in_ifaddr *ifa)
533 {
534         return __inet_insert_ifa(ifa, NULL, 0, NULL);
535 }
536
537 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
538 {
539         struct in_device *in_dev = __in_dev_get_rtnl(dev);
540
541         ASSERT_RTNL();
542
543         if (!in_dev) {
544                 inet_free_ifa(ifa);
545                 return -ENOBUFS;
546         }
547         ipv4_devconf_setall(in_dev);
548         neigh_parms_data_state_setall(in_dev->arp_parms);
549         if (ifa->ifa_dev != in_dev) {
550                 WARN_ON(ifa->ifa_dev);
551                 in_dev_hold(in_dev);
552                 ifa->ifa_dev = in_dev;
553         }
554         if (ipv4_is_loopback(ifa->ifa_local))
555                 ifa->ifa_scope = RT_SCOPE_HOST;
556         return inet_insert_ifa(ifa);
557 }
558
559 /* Caller must hold RCU or RTNL :
560  * We dont take a reference on found in_device
561  */
562 struct in_device *inetdev_by_index(struct net *net, int ifindex)
563 {
564         struct net_device *dev;
565         struct in_device *in_dev = NULL;
566
567         rcu_read_lock();
568         dev = dev_get_by_index_rcu(net, ifindex);
569         if (dev)
570                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
571         rcu_read_unlock();
572         return in_dev;
573 }
574 EXPORT_SYMBOL(inetdev_by_index);
575
576 /* Called only from RTNL semaphored context. No locks. */
577
578 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
579                                     __be32 mask)
580 {
581         ASSERT_RTNL();
582
583         for_primary_ifa(in_dev) {
584                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
585                         return ifa;
586         } endfor_ifa(in_dev);
587         return NULL;
588 }
589
590 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
591 {
592         struct ip_mreqn mreq = {
593                 .imr_multiaddr.s_addr = ifa->ifa_address,
594                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
595         };
596         int ret;
597
598         ASSERT_RTNL();
599
600         lock_sock(sk);
601         if (join)
602                 ret = ip_mc_join_group(sk, &mreq);
603         else
604                 ret = ip_mc_leave_group(sk, &mreq);
605         release_sock(sk);
606
607         return ret;
608 }
609
610 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
611                             struct netlink_ext_ack *extack)
612 {
613         struct net *net = sock_net(skb->sk);
614         struct nlattr *tb[IFA_MAX+1];
615         struct in_device *in_dev;
616         struct ifaddrmsg *ifm;
617         struct in_ifaddr *ifa, **ifap;
618         int err = -EINVAL;
619
620         ASSERT_RTNL();
621
622         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
623                           extack);
624         if (err < 0)
625                 goto errout;
626
627         ifm = nlmsg_data(nlh);
628         in_dev = inetdev_by_index(net, ifm->ifa_index);
629         if (!in_dev) {
630                 err = -ENODEV;
631                 goto errout;
632         }
633
634         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
635              ifap = &ifa->ifa_next) {
636                 if (tb[IFA_LOCAL] &&
637                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
638                         continue;
639
640                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
641                         continue;
642
643                 if (tb[IFA_ADDRESS] &&
644                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
645                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
646                         continue;
647
648                 if (ipv4_is_multicast(ifa->ifa_address))
649                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
650                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
651                 return 0;
652         }
653
654         err = -EADDRNOTAVAIL;
655 errout:
656         return err;
657 }
658
659 #define INFINITY_LIFE_TIME      0xFFFFFFFF
660
661 static void check_lifetime(struct work_struct *work)
662 {
663         unsigned long now, next, next_sec, next_sched;
664         struct in_ifaddr *ifa;
665         struct hlist_node *n;
666         int i;
667
668         now = jiffies;
669         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
670
671         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
672                 bool change_needed = false;
673
674                 rcu_read_lock();
675                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
676                         unsigned long age;
677
678                         if (ifa->ifa_flags & IFA_F_PERMANENT)
679                                 continue;
680
681                         /* We try to batch several events at once. */
682                         age = (now - ifa->ifa_tstamp +
683                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
684
685                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
686                             age >= ifa->ifa_valid_lft) {
687                                 change_needed = true;
688                         } else if (ifa->ifa_preferred_lft ==
689                                    INFINITY_LIFE_TIME) {
690                                 continue;
691                         } else if (age >= ifa->ifa_preferred_lft) {
692                                 if (time_before(ifa->ifa_tstamp +
693                                                 ifa->ifa_valid_lft * HZ, next))
694                                         next = ifa->ifa_tstamp +
695                                                ifa->ifa_valid_lft * HZ;
696
697                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
698                                         change_needed = true;
699                         } else if (time_before(ifa->ifa_tstamp +
700                                                ifa->ifa_preferred_lft * HZ,
701                                                next)) {
702                                 next = ifa->ifa_tstamp +
703                                        ifa->ifa_preferred_lft * HZ;
704                         }
705                 }
706                 rcu_read_unlock();
707                 if (!change_needed)
708                         continue;
709                 rtnl_lock();
710                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
711                         unsigned long age;
712
713                         if (ifa->ifa_flags & IFA_F_PERMANENT)
714                                 continue;
715
716                         /* We try to batch several events at once. */
717                         age = (now - ifa->ifa_tstamp +
718                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
719
720                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
721                             age >= ifa->ifa_valid_lft) {
722                                 struct in_ifaddr **ifap;
723
724                                 for (ifap = &ifa->ifa_dev->ifa_list;
725                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
726                                         if (*ifap == ifa) {
727                                                 inet_del_ifa(ifa->ifa_dev,
728                                                              ifap, 1);
729                                                 break;
730                                         }
731                                 }
732                         } else if (ifa->ifa_preferred_lft !=
733                                    INFINITY_LIFE_TIME &&
734                                    age >= ifa->ifa_preferred_lft &&
735                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
736                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
737                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
738                         }
739                 }
740                 rtnl_unlock();
741         }
742
743         next_sec = round_jiffies_up(next);
744         next_sched = next;
745
746         /* If rounded timeout is accurate enough, accept it. */
747         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
748                 next_sched = next_sec;
749
750         now = jiffies;
751         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
752         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
753                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
754
755         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
756                         next_sched - now);
757 }
758
759 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
760                              __u32 prefered_lft)
761 {
762         unsigned long timeout;
763
764         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
765
766         timeout = addrconf_timeout_fixup(valid_lft, HZ);
767         if (addrconf_finite_timeout(timeout))
768                 ifa->ifa_valid_lft = timeout;
769         else
770                 ifa->ifa_flags |= IFA_F_PERMANENT;
771
772         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
773         if (addrconf_finite_timeout(timeout)) {
774                 if (timeout == 0)
775                         ifa->ifa_flags |= IFA_F_DEPRECATED;
776                 ifa->ifa_preferred_lft = timeout;
777         }
778         ifa->ifa_tstamp = jiffies;
779         if (!ifa->ifa_cstamp)
780                 ifa->ifa_cstamp = ifa->ifa_tstamp;
781 }
782
783 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
784                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
785 {
786         struct nlattr *tb[IFA_MAX+1];
787         struct in_ifaddr *ifa;
788         struct ifaddrmsg *ifm;
789         struct net_device *dev;
790         struct in_device *in_dev;
791         int err;
792
793         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
794                           NULL);
795         if (err < 0)
796                 goto errout;
797
798         ifm = nlmsg_data(nlh);
799         err = -EINVAL;
800         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
801                 goto errout;
802
803         dev = __dev_get_by_index(net, ifm->ifa_index);
804         err = -ENODEV;
805         if (!dev)
806                 goto errout;
807
808         in_dev = __in_dev_get_rtnl(dev);
809         err = -ENOBUFS;
810         if (!in_dev)
811                 goto errout;
812
813         ifa = inet_alloc_ifa();
814         if (!ifa)
815                 /*
816                  * A potential indev allocation can be left alive, it stays
817                  * assigned to its device and is destroy with it.
818                  */
819                 goto errout;
820
821         ipv4_devconf_setall(in_dev);
822         neigh_parms_data_state_setall(in_dev->arp_parms);
823         in_dev_hold(in_dev);
824
825         if (!tb[IFA_ADDRESS])
826                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
827
828         INIT_HLIST_NODE(&ifa->hash);
829         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
830         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
831         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
832                                          ifm->ifa_flags;
833         ifa->ifa_scope = ifm->ifa_scope;
834         ifa->ifa_dev = in_dev;
835
836         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
837         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
838
839         if (tb[IFA_BROADCAST])
840                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
841
842         if (tb[IFA_LABEL])
843                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
844         else
845                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
846
847         if (tb[IFA_RT_PRIORITY])
848                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
849
850         if (tb[IFA_CACHEINFO]) {
851                 struct ifa_cacheinfo *ci;
852
853                 ci = nla_data(tb[IFA_CACHEINFO]);
854                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
855                         err = -EINVAL;
856                         goto errout_free;
857                 }
858                 *pvalid_lft = ci->ifa_valid;
859                 *pprefered_lft = ci->ifa_prefered;
860         }
861
862         return ifa;
863
864 errout_free:
865         inet_free_ifa(ifa);
866 errout:
867         return ERR_PTR(err);
868 }
869
870 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
871 {
872         struct in_device *in_dev = ifa->ifa_dev;
873         struct in_ifaddr *ifa1, **ifap;
874
875         if (!ifa->ifa_local)
876                 return NULL;
877
878         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
879              ifap = &ifa1->ifa_next) {
880                 if (ifa1->ifa_mask == ifa->ifa_mask &&
881                     inet_ifa_match(ifa1->ifa_address, ifa) &&
882                     ifa1->ifa_local == ifa->ifa_local)
883                         return ifa1;
884         }
885         return NULL;
886 }
887
888 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
889                             struct netlink_ext_ack *extack)
890 {
891         struct net *net = sock_net(skb->sk);
892         struct in_ifaddr *ifa;
893         struct in_ifaddr *ifa_existing;
894         __u32 valid_lft = INFINITY_LIFE_TIME;
895         __u32 prefered_lft = INFINITY_LIFE_TIME;
896
897         ASSERT_RTNL();
898
899         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
900         if (IS_ERR(ifa))
901                 return PTR_ERR(ifa);
902
903         ifa_existing = find_matching_ifa(ifa);
904         if (!ifa_existing) {
905                 /* It would be best to check for !NLM_F_CREATE here but
906                  * userspace already relies on not having to provide this.
907                  */
908                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
909                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
910                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
911                                                true, ifa);
912
913                         if (ret < 0) {
914                                 inet_free_ifa(ifa);
915                                 return ret;
916                         }
917                 }
918                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
919                                          extack);
920         } else {
921                 u32 new_metric = ifa->ifa_rt_priority;
922
923                 inet_free_ifa(ifa);
924
925                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
926                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
927                         return -EEXIST;
928                 ifa = ifa_existing;
929
930                 if (ifa->ifa_rt_priority != new_metric) {
931                         fib_modify_prefix_metric(ifa, new_metric);
932                         ifa->ifa_rt_priority = new_metric;
933                 }
934
935                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
936                 cancel_delayed_work(&check_lifetime_work);
937                 queue_delayed_work(system_power_efficient_wq,
938                                 &check_lifetime_work, 0);
939                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
940         }
941         return 0;
942 }
943
944 /*
945  *      Determine a default network mask, based on the IP address.
946  */
947
948 static int inet_abc_len(__be32 addr)
949 {
950         int rc = -1;    /* Something else, probably a multicast. */
951
952         if (ipv4_is_zeronet(addr))
953                 rc = 0;
954         else {
955                 __u32 haddr = ntohl(addr);
956
957                 if (IN_CLASSA(haddr))
958                         rc = 8;
959                 else if (IN_CLASSB(haddr))
960                         rc = 16;
961                 else if (IN_CLASSC(haddr))
962                         rc = 24;
963         }
964
965         return rc;
966 }
967
968
969 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
970 {
971         struct sockaddr_in sin_orig;
972         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
973         struct in_device *in_dev;
974         struct in_ifaddr **ifap = NULL;
975         struct in_ifaddr *ifa = NULL;
976         struct net_device *dev;
977         char *colon;
978         int ret = -EFAULT;
979         int tryaddrmatch = 0;
980
981         ifr->ifr_name[IFNAMSIZ - 1] = 0;
982
983         /* save original address for comparison */
984         memcpy(&sin_orig, sin, sizeof(*sin));
985
986         colon = strchr(ifr->ifr_name, ':');
987         if (colon)
988                 *colon = 0;
989
990         dev_load(net, ifr->ifr_name);
991
992         switch (cmd) {
993         case SIOCGIFADDR:       /* Get interface address */
994         case SIOCGIFBRDADDR:    /* Get the broadcast address */
995         case SIOCGIFDSTADDR:    /* Get the destination address */
996         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
997                 /* Note that these ioctls will not sleep,
998                    so that we do not impose a lock.
999                    One day we will be forced to put shlock here (I mean SMP)
1000                  */
1001                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1002                 memset(sin, 0, sizeof(*sin));
1003                 sin->sin_family = AF_INET;
1004                 break;
1005
1006         case SIOCSIFFLAGS:
1007                 ret = -EPERM;
1008                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009                         goto out;
1010                 break;
1011         case SIOCSIFADDR:       /* Set interface address (and family) */
1012         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1013         case SIOCSIFDSTADDR:    /* Set the destination address */
1014         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1015                 ret = -EPERM;
1016                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1017                         goto out;
1018                 ret = -EINVAL;
1019                 if (sin->sin_family != AF_INET)
1020                         goto out;
1021                 break;
1022         default:
1023                 ret = -EINVAL;
1024                 goto out;
1025         }
1026
1027         rtnl_lock();
1028
1029         ret = -ENODEV;
1030         dev = __dev_get_by_name(net, ifr->ifr_name);
1031         if (!dev)
1032                 goto done;
1033
1034         if (colon)
1035                 *colon = ':';
1036
1037         in_dev = __in_dev_get_rtnl(dev);
1038         if (in_dev) {
1039                 if (tryaddrmatch) {
1040                         /* Matthias Andree */
1041                         /* compare label and address (4.4BSD style) */
1042                         /* note: we only do this for a limited set of ioctls
1043                            and only if the original address family was AF_INET.
1044                            This is checked above. */
1045                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1046                              ifap = &ifa->ifa_next) {
1047                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1048                                     sin_orig.sin_addr.s_addr ==
1049                                                         ifa->ifa_local) {
1050                                         break; /* found */
1051                                 }
1052                         }
1053                 }
1054                 /* we didn't get a match, maybe the application is
1055                    4.3BSD-style and passed in junk so we fall back to
1056                    comparing just the label */
1057                 if (!ifa) {
1058                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1059                              ifap = &ifa->ifa_next)
1060                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1061                                         break;
1062                 }
1063         }
1064
1065         ret = -EADDRNOTAVAIL;
1066         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1067                 goto done;
1068
1069         switch (cmd) {
1070         case SIOCGIFADDR:       /* Get interface address */
1071                 ret = 0;
1072                 sin->sin_addr.s_addr = ifa->ifa_local;
1073                 break;
1074
1075         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1076                 ret = 0;
1077                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1078                 break;
1079
1080         case SIOCGIFDSTADDR:    /* Get the destination address */
1081                 ret = 0;
1082                 sin->sin_addr.s_addr = ifa->ifa_address;
1083                 break;
1084
1085         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1086                 ret = 0;
1087                 sin->sin_addr.s_addr = ifa->ifa_mask;
1088                 break;
1089
1090         case SIOCSIFFLAGS:
1091                 if (colon) {
1092                         ret = -EADDRNOTAVAIL;
1093                         if (!ifa)
1094                                 break;
1095                         ret = 0;
1096                         if (!(ifr->ifr_flags & IFF_UP))
1097                                 inet_del_ifa(in_dev, ifap, 1);
1098                         break;
1099                 }
1100                 ret = dev_change_flags(dev, ifr->ifr_flags);
1101                 break;
1102
1103         case SIOCSIFADDR:       /* Set interface address (and family) */
1104                 ret = -EINVAL;
1105                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1106                         break;
1107
1108                 if (!ifa) {
1109                         ret = -ENOBUFS;
1110                         ifa = inet_alloc_ifa();
1111                         if (!ifa)
1112                                 break;
1113                         INIT_HLIST_NODE(&ifa->hash);
1114                         if (colon)
1115                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1116                         else
1117                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1118                 } else {
1119                         ret = 0;
1120                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1121                                 break;
1122                         inet_del_ifa(in_dev, ifap, 0);
1123                         ifa->ifa_broadcast = 0;
1124                         ifa->ifa_scope = 0;
1125                 }
1126
1127                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1128
1129                 if (!(dev->flags & IFF_POINTOPOINT)) {
1130                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1131                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1132                         if ((dev->flags & IFF_BROADCAST) &&
1133                             ifa->ifa_prefixlen < 31)
1134                                 ifa->ifa_broadcast = ifa->ifa_address |
1135                                                      ~ifa->ifa_mask;
1136                 } else {
1137                         ifa->ifa_prefixlen = 32;
1138                         ifa->ifa_mask = inet_make_mask(32);
1139                 }
1140                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1141                 ret = inet_set_ifa(dev, ifa);
1142                 break;
1143
1144         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1145                 ret = 0;
1146                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1147                         inet_del_ifa(in_dev, ifap, 0);
1148                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1149                         inet_insert_ifa(ifa);
1150                 }
1151                 break;
1152
1153         case SIOCSIFDSTADDR:    /* Set the destination address */
1154                 ret = 0;
1155                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1156                         break;
1157                 ret = -EINVAL;
1158                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1159                         break;
1160                 ret = 0;
1161                 inet_del_ifa(in_dev, ifap, 0);
1162                 ifa->ifa_address = sin->sin_addr.s_addr;
1163                 inet_insert_ifa(ifa);
1164                 break;
1165
1166         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1167
1168                 /*
1169                  *      The mask we set must be legal.
1170                  */
1171                 ret = -EINVAL;
1172                 if (bad_mask(sin->sin_addr.s_addr, 0))
1173                         break;
1174                 ret = 0;
1175                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1176                         __be32 old_mask = ifa->ifa_mask;
1177                         inet_del_ifa(in_dev, ifap, 0);
1178                         ifa->ifa_mask = sin->sin_addr.s_addr;
1179                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1180
1181                         /* See if current broadcast address matches
1182                          * with current netmask, then recalculate
1183                          * the broadcast address. Otherwise it's a
1184                          * funny address, so don't touch it since
1185                          * the user seems to know what (s)he's doing...
1186                          */
1187                         if ((dev->flags & IFF_BROADCAST) &&
1188                             (ifa->ifa_prefixlen < 31) &&
1189                             (ifa->ifa_broadcast ==
1190                              (ifa->ifa_local|~old_mask))) {
1191                                 ifa->ifa_broadcast = (ifa->ifa_local |
1192                                                       ~sin->sin_addr.s_addr);
1193                         }
1194                         inet_insert_ifa(ifa);
1195                 }
1196                 break;
1197         }
1198 done:
1199         rtnl_unlock();
1200 out:
1201         return ret;
1202 }
1203
1204 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1205 {
1206         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1207         struct in_ifaddr *ifa;
1208         struct ifreq ifr;
1209         int done = 0;
1210
1211         if (WARN_ON(size > sizeof(struct ifreq)))
1212                 goto out;
1213
1214         if (!in_dev)
1215                 goto out;
1216
1217         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1218                 if (!buf) {
1219                         done += size;
1220                         continue;
1221                 }
1222                 if (len < size)
1223                         break;
1224                 memset(&ifr, 0, sizeof(struct ifreq));
1225                 strcpy(ifr.ifr_name, ifa->ifa_label);
1226
1227                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1228                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1229                                                                 ifa->ifa_local;
1230
1231                 if (copy_to_user(buf + done, &ifr, size)) {
1232                         done = -EFAULT;
1233                         break;
1234                 }
1235                 len  -= size;
1236                 done += size;
1237         }
1238 out:
1239         return done;
1240 }
1241
1242 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1243                                  int scope)
1244 {
1245         for_primary_ifa(in_dev) {
1246                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1247                     ifa->ifa_scope <= scope)
1248                         return ifa->ifa_local;
1249         } endfor_ifa(in_dev);
1250
1251         return 0;
1252 }
1253
1254 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1255 {
1256         __be32 addr = 0;
1257         struct in_device *in_dev;
1258         struct net *net = dev_net(dev);
1259         int master_idx;
1260
1261         rcu_read_lock();
1262         in_dev = __in_dev_get_rcu(dev);
1263         if (!in_dev)
1264                 goto no_in_dev;
1265
1266         for_primary_ifa(in_dev) {
1267                 if (ifa->ifa_scope > scope)
1268                         continue;
1269                 if (!dst || inet_ifa_match(dst, ifa)) {
1270                         addr = ifa->ifa_local;
1271                         break;
1272                 }
1273                 if (!addr)
1274                         addr = ifa->ifa_local;
1275         } endfor_ifa(in_dev);
1276
1277         if (addr)
1278                 goto out_unlock;
1279 no_in_dev:
1280         master_idx = l3mdev_master_ifindex_rcu(dev);
1281
1282         /* For VRFs, the VRF device takes the place of the loopback device,
1283          * with addresses on it being preferred.  Note in such cases the
1284          * loopback device will be among the devices that fail the master_idx
1285          * equality check in the loop below.
1286          */
1287         if (master_idx &&
1288             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1289             (in_dev = __in_dev_get_rcu(dev))) {
1290                 addr = in_dev_select_addr(in_dev, scope);
1291                 if (addr)
1292                         goto out_unlock;
1293         }
1294
1295         /* Not loopback addresses on loopback should be preferred
1296            in this case. It is important that lo is the first interface
1297            in dev_base list.
1298          */
1299         for_each_netdev_rcu(net, dev) {
1300                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1301                         continue;
1302
1303                 in_dev = __in_dev_get_rcu(dev);
1304                 if (!in_dev)
1305                         continue;
1306
1307                 addr = in_dev_select_addr(in_dev, scope);
1308                 if (addr)
1309                         goto out_unlock;
1310         }
1311 out_unlock:
1312         rcu_read_unlock();
1313         return addr;
1314 }
1315 EXPORT_SYMBOL(inet_select_addr);
1316
1317 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1318                               __be32 local, int scope)
1319 {
1320         int same = 0;
1321         __be32 addr = 0;
1322
1323         for_ifa(in_dev) {
1324                 if (!addr &&
1325                     (local == ifa->ifa_local || !local) &&
1326                     ifa->ifa_scope <= scope) {
1327                         addr = ifa->ifa_local;
1328                         if (same)
1329                                 break;
1330                 }
1331                 if (!same) {
1332                         same = (!local || inet_ifa_match(local, ifa)) &&
1333                                 (!dst || inet_ifa_match(dst, ifa));
1334                         if (same && addr) {
1335                                 if (local || !dst)
1336                                         break;
1337                                 /* Is the selected addr into dst subnet? */
1338                                 if (inet_ifa_match(addr, ifa))
1339                                         break;
1340                                 /* No, then can we use new local src? */
1341                                 if (ifa->ifa_scope <= scope) {
1342                                         addr = ifa->ifa_local;
1343                                         break;
1344                                 }
1345                                 /* search for large dst subnet for addr */
1346                                 same = 0;
1347                         }
1348                 }
1349         } endfor_ifa(in_dev);
1350
1351         return same ? addr : 0;
1352 }
1353
1354 /*
1355  * Confirm that local IP address exists using wildcards:
1356  * - net: netns to check, cannot be NULL
1357  * - in_dev: only on this interface, NULL=any interface
1358  * - dst: only in the same subnet as dst, 0=any dst
1359  * - local: address, 0=autoselect the local address
1360  * - scope: maximum allowed scope value for the local address
1361  */
1362 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1363                          __be32 dst, __be32 local, int scope)
1364 {
1365         __be32 addr = 0;
1366         struct net_device *dev;
1367
1368         if (in_dev)
1369                 return confirm_addr_indev(in_dev, dst, local, scope);
1370
1371         rcu_read_lock();
1372         for_each_netdev_rcu(net, dev) {
1373                 in_dev = __in_dev_get_rcu(dev);
1374                 if (in_dev) {
1375                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1376                         if (addr)
1377                                 break;
1378                 }
1379         }
1380         rcu_read_unlock();
1381
1382         return addr;
1383 }
1384 EXPORT_SYMBOL(inet_confirm_addr);
1385
1386 /*
1387  *      Device notifier
1388  */
1389
1390 int register_inetaddr_notifier(struct notifier_block *nb)
1391 {
1392         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1393 }
1394 EXPORT_SYMBOL(register_inetaddr_notifier);
1395
1396 int unregister_inetaddr_notifier(struct notifier_block *nb)
1397 {
1398         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1399 }
1400 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1401
1402 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1403 {
1404         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1405 }
1406 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1407
1408 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1409 {
1410         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1411             nb);
1412 }
1413 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1414
1415 /* Rename ifa_labels for a device name change. Make some effort to preserve
1416  * existing alias numbering and to create unique labels if possible.
1417 */
1418 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1419 {
1420         struct in_ifaddr *ifa;
1421         int named = 0;
1422
1423         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1424                 char old[IFNAMSIZ], *dot;
1425
1426                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1427                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1428                 if (named++ == 0)
1429                         goto skip;
1430                 dot = strchr(old, ':');
1431                 if (!dot) {
1432                         sprintf(old, ":%d", named);
1433                         dot = old;
1434                 }
1435                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1436                         strcat(ifa->ifa_label, dot);
1437                 else
1438                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1439 skip:
1440                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1441         }
1442 }
1443
1444 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1445                                         struct in_device *in_dev)
1446
1447 {
1448         struct in_ifaddr *ifa;
1449
1450         for (ifa = in_dev->ifa_list; ifa;
1451              ifa = ifa->ifa_next) {
1452                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1453                          ifa->ifa_local, dev,
1454                          ifa->ifa_local, NULL,
1455                          dev->dev_addr, NULL);
1456         }
1457 }
1458
1459 /* Called only under RTNL semaphore */
1460
1461 static int inetdev_event(struct notifier_block *this, unsigned long event,
1462                          void *ptr)
1463 {
1464         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1465         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1466
1467         ASSERT_RTNL();
1468
1469         if (!in_dev) {
1470                 if (event == NETDEV_REGISTER) {
1471                         in_dev = inetdev_init(dev);
1472                         if (IS_ERR(in_dev))
1473                                 return notifier_from_errno(PTR_ERR(in_dev));
1474                         if (dev->flags & IFF_LOOPBACK) {
1475                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1476                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1477                         }
1478                 } else if (event == NETDEV_CHANGEMTU) {
1479                         /* Re-enabling IP */
1480                         if (inetdev_valid_mtu(dev->mtu))
1481                                 in_dev = inetdev_init(dev);
1482                 }
1483                 goto out;
1484         }
1485
1486         switch (event) {
1487         case NETDEV_REGISTER:
1488                 pr_debug("%s: bug\n", __func__);
1489                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1490                 break;
1491         case NETDEV_UP:
1492                 if (!inetdev_valid_mtu(dev->mtu))
1493                         break;
1494                 if (dev->flags & IFF_LOOPBACK) {
1495                         struct in_ifaddr *ifa = inet_alloc_ifa();
1496
1497                         if (ifa) {
1498                                 INIT_HLIST_NODE(&ifa->hash);
1499                                 ifa->ifa_local =
1500                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1501                                 ifa->ifa_prefixlen = 8;
1502                                 ifa->ifa_mask = inet_make_mask(8);
1503                                 in_dev_hold(in_dev);
1504                                 ifa->ifa_dev = in_dev;
1505                                 ifa->ifa_scope = RT_SCOPE_HOST;
1506                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1507                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1508                                                  INFINITY_LIFE_TIME);
1509                                 ipv4_devconf_setall(in_dev);
1510                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1511                                 inet_insert_ifa(ifa);
1512                         }
1513                 }
1514                 ip_mc_up(in_dev);
1515                 /* fall through */
1516         case NETDEV_CHANGEADDR:
1517                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1518                         break;
1519                 /* fall through */
1520         case NETDEV_NOTIFY_PEERS:
1521                 /* Send gratuitous ARP to notify of link change */
1522                 inetdev_send_gratuitous_arp(dev, in_dev);
1523                 break;
1524         case NETDEV_DOWN:
1525                 ip_mc_down(in_dev);
1526                 break;
1527         case NETDEV_PRE_TYPE_CHANGE:
1528                 ip_mc_unmap(in_dev);
1529                 break;
1530         case NETDEV_POST_TYPE_CHANGE:
1531                 ip_mc_remap(in_dev);
1532                 break;
1533         case NETDEV_CHANGEMTU:
1534                 if (inetdev_valid_mtu(dev->mtu))
1535                         break;
1536                 /* disable IP when MTU is not enough */
1537                 /* fall through */
1538         case NETDEV_UNREGISTER:
1539                 inetdev_destroy(in_dev);
1540                 break;
1541         case NETDEV_CHANGENAME:
1542                 /* Do not notify about label change, this event is
1543                  * not interesting to applications using netlink.
1544                  */
1545                 inetdev_changename(dev, in_dev);
1546
1547                 devinet_sysctl_unregister(in_dev);
1548                 devinet_sysctl_register(in_dev);
1549                 break;
1550         }
1551 out:
1552         return NOTIFY_DONE;
1553 }
1554
1555 static struct notifier_block ip_netdev_notifier = {
1556         .notifier_call = inetdev_event,
1557 };
1558
1559 static size_t inet_nlmsg_size(void)
1560 {
1561         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1562                + nla_total_size(4) /* IFA_ADDRESS */
1563                + nla_total_size(4) /* IFA_LOCAL */
1564                + nla_total_size(4) /* IFA_BROADCAST */
1565                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1566                + nla_total_size(4)  /* IFA_FLAGS */
1567                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1568                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1569 }
1570
1571 static inline u32 cstamp_delta(unsigned long cstamp)
1572 {
1573         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1574 }
1575
1576 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1577                          unsigned long tstamp, u32 preferred, u32 valid)
1578 {
1579         struct ifa_cacheinfo ci;
1580
1581         ci.cstamp = cstamp_delta(cstamp);
1582         ci.tstamp = cstamp_delta(tstamp);
1583         ci.ifa_prefered = preferred;
1584         ci.ifa_valid = valid;
1585
1586         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1587 }
1588
1589 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1590                             u32 portid, u32 seq, int event, unsigned int flags)
1591 {
1592         struct ifaddrmsg *ifm;
1593         struct nlmsghdr  *nlh;
1594         u32 preferred, valid;
1595
1596         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1597         if (!nlh)
1598                 return -EMSGSIZE;
1599
1600         ifm = nlmsg_data(nlh);
1601         ifm->ifa_family = AF_INET;
1602         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1603         ifm->ifa_flags = ifa->ifa_flags;
1604         ifm->ifa_scope = ifa->ifa_scope;
1605         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1606
1607         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1608                 preferred = ifa->ifa_preferred_lft;
1609                 valid = ifa->ifa_valid_lft;
1610                 if (preferred != INFINITY_LIFE_TIME) {
1611                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1612
1613                         if (preferred > tval)
1614                                 preferred -= tval;
1615                         else
1616                                 preferred = 0;
1617                         if (valid != INFINITY_LIFE_TIME) {
1618                                 if (valid > tval)
1619                                         valid -= tval;
1620                                 else
1621                                         valid = 0;
1622                         }
1623                 }
1624         } else {
1625                 preferred = INFINITY_LIFE_TIME;
1626                 valid = INFINITY_LIFE_TIME;
1627         }
1628         if ((ifa->ifa_address &&
1629              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1630             (ifa->ifa_local &&
1631              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1632             (ifa->ifa_broadcast &&
1633              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1634             (ifa->ifa_label[0] &&
1635              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1636             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1637             (ifa->ifa_rt_priority &&
1638              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1639             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1640                           preferred, valid))
1641                 goto nla_put_failure;
1642
1643         nlmsg_end(skb, nlh);
1644         return 0;
1645
1646 nla_put_failure:
1647         nlmsg_cancel(skb, nlh);
1648         return -EMSGSIZE;
1649 }
1650
1651 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1652 {
1653         struct net *net = sock_net(skb->sk);
1654         int h, s_h;
1655         int idx, s_idx;
1656         int ip_idx, s_ip_idx;
1657         struct net_device *dev;
1658         struct in_device *in_dev;
1659         struct in_ifaddr *ifa;
1660         struct hlist_head *head;
1661
1662         s_h = cb->args[0];
1663         s_idx = idx = cb->args[1];
1664         s_ip_idx = ip_idx = cb->args[2];
1665
1666         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1667                 idx = 0;
1668                 head = &net->dev_index_head[h];
1669                 rcu_read_lock();
1670                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1671                           net->dev_base_seq;
1672                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1673                         if (idx < s_idx)
1674                                 goto cont;
1675                         if (h > s_h || idx > s_idx)
1676                                 s_ip_idx = 0;
1677                         in_dev = __in_dev_get_rcu(dev);
1678                         if (!in_dev)
1679                                 goto cont;
1680
1681                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1682                              ifa = ifa->ifa_next, ip_idx++) {
1683                                 if (ip_idx < s_ip_idx)
1684                                         continue;
1685                                 if (inet_fill_ifaddr(skb, ifa,
1686                                              NETLINK_CB(cb->skb).portid,
1687                                              cb->nlh->nlmsg_seq,
1688                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1689                                         rcu_read_unlock();
1690                                         goto done;
1691                                 }
1692                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1693                         }
1694 cont:
1695                         idx++;
1696                 }
1697                 rcu_read_unlock();
1698         }
1699
1700 done:
1701         cb->args[0] = h;
1702         cb->args[1] = idx;
1703         cb->args[2] = ip_idx;
1704
1705         return skb->len;
1706 }
1707
1708 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1709                       u32 portid)
1710 {
1711         struct sk_buff *skb;
1712         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1713         int err = -ENOBUFS;
1714         struct net *net;
1715
1716         net = dev_net(ifa->ifa_dev->dev);
1717         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1718         if (!skb)
1719                 goto errout;
1720
1721         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1722         if (err < 0) {
1723                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1724                 WARN_ON(err == -EMSGSIZE);
1725                 kfree_skb(skb);
1726                 goto errout;
1727         }
1728         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1729         return;
1730 errout:
1731         if (err < 0)
1732                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1733 }
1734
1735 static size_t inet_get_link_af_size(const struct net_device *dev,
1736                                     u32 ext_filter_mask)
1737 {
1738         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1739
1740         if (!in_dev)
1741                 return 0;
1742
1743         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1744 }
1745
1746 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1747                              u32 ext_filter_mask)
1748 {
1749         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1750         struct nlattr *nla;
1751         int i;
1752
1753         if (!in_dev)
1754                 return -ENODATA;
1755
1756         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1757         if (!nla)
1758                 return -EMSGSIZE;
1759
1760         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1761                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1762
1763         return 0;
1764 }
1765
1766 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1767         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1768 };
1769
1770 static int inet_validate_link_af(const struct net_device *dev,
1771                                  const struct nlattr *nla)
1772 {
1773         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1774         int err, rem;
1775
1776         if (dev && !__in_dev_get_rcu(dev))
1777                 return -EAFNOSUPPORT;
1778
1779         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1780         if (err < 0)
1781                 return err;
1782
1783         if (tb[IFLA_INET_CONF]) {
1784                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1785                         int cfgid = nla_type(a);
1786
1787                         if (nla_len(a) < 4)
1788                                 return -EINVAL;
1789
1790                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1791                                 return -EINVAL;
1792                 }
1793         }
1794
1795         return 0;
1796 }
1797
1798 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1799 {
1800         struct in_device *in_dev = __in_dev_get_rcu(dev);
1801         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1802         int rem;
1803
1804         if (!in_dev)
1805                 return -EAFNOSUPPORT;
1806
1807         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1808                 BUG();
1809
1810         if (tb[IFLA_INET_CONF]) {
1811                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1812                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1813         }
1814
1815         return 0;
1816 }
1817
1818 static int inet_netconf_msgsize_devconf(int type)
1819 {
1820         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1821                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1822         bool all = false;
1823
1824         if (type == NETCONFA_ALL)
1825                 all = true;
1826
1827         if (all || type == NETCONFA_FORWARDING)
1828                 size += nla_total_size(4);
1829         if (all || type == NETCONFA_RP_FILTER)
1830                 size += nla_total_size(4);
1831         if (all || type == NETCONFA_MC_FORWARDING)
1832                 size += nla_total_size(4);
1833         if (all || type == NETCONFA_BC_FORWARDING)
1834                 size += nla_total_size(4);
1835         if (all || type == NETCONFA_PROXY_NEIGH)
1836                 size += nla_total_size(4);
1837         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1838                 size += nla_total_size(4);
1839
1840         return size;
1841 }
1842
1843 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1844                                      struct ipv4_devconf *devconf, u32 portid,
1845                                      u32 seq, int event, unsigned int flags,
1846                                      int type)
1847 {
1848         struct nlmsghdr  *nlh;
1849         struct netconfmsg *ncm;
1850         bool all = false;
1851
1852         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1853                         flags);
1854         if (!nlh)
1855                 return -EMSGSIZE;
1856
1857         if (type == NETCONFA_ALL)
1858                 all = true;
1859
1860         ncm = nlmsg_data(nlh);
1861         ncm->ncm_family = AF_INET;
1862
1863         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1864                 goto nla_put_failure;
1865
1866         if (!devconf)
1867                 goto out;
1868
1869         if ((all || type == NETCONFA_FORWARDING) &&
1870             nla_put_s32(skb, NETCONFA_FORWARDING,
1871                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1872                 goto nla_put_failure;
1873         if ((all || type == NETCONFA_RP_FILTER) &&
1874             nla_put_s32(skb, NETCONFA_RP_FILTER,
1875                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1876                 goto nla_put_failure;
1877         if ((all || type == NETCONFA_MC_FORWARDING) &&
1878             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1879                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1880                 goto nla_put_failure;
1881         if ((all || type == NETCONFA_BC_FORWARDING) &&
1882             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1883                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1884                 goto nla_put_failure;
1885         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1886             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1887                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1888                 goto nla_put_failure;
1889         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1890             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1891                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1892                 goto nla_put_failure;
1893
1894 out:
1895         nlmsg_end(skb, nlh);
1896         return 0;
1897
1898 nla_put_failure:
1899         nlmsg_cancel(skb, nlh);
1900         return -EMSGSIZE;
1901 }
1902
1903 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1904                                  int ifindex, struct ipv4_devconf *devconf)
1905 {
1906         struct sk_buff *skb;
1907         int err = -ENOBUFS;
1908
1909         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1910         if (!skb)
1911                 goto errout;
1912
1913         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1914                                         event, 0, type);
1915         if (err < 0) {
1916                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1917                 WARN_ON(err == -EMSGSIZE);
1918                 kfree_skb(skb);
1919                 goto errout;
1920         }
1921         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1922         return;
1923 errout:
1924         if (err < 0)
1925                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1926 }
1927
1928 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1929         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1930         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1931         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1932         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1933         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1934 };
1935
1936 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1937                                     struct nlmsghdr *nlh,
1938                                     struct netlink_ext_ack *extack)
1939 {
1940         struct net *net = sock_net(in_skb->sk);
1941         struct nlattr *tb[NETCONFA_MAX+1];
1942         struct netconfmsg *ncm;
1943         struct sk_buff *skb;
1944         struct ipv4_devconf *devconf;
1945         struct in_device *in_dev;
1946         struct net_device *dev;
1947         int ifindex;
1948         int err;
1949
1950         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1951                           devconf_ipv4_policy, extack);
1952         if (err < 0)
1953                 goto errout;
1954
1955         err = -EINVAL;
1956         if (!tb[NETCONFA_IFINDEX])
1957                 goto errout;
1958
1959         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1960         switch (ifindex) {
1961         case NETCONFA_IFINDEX_ALL:
1962                 devconf = net->ipv4.devconf_all;
1963                 break;
1964         case NETCONFA_IFINDEX_DEFAULT:
1965                 devconf = net->ipv4.devconf_dflt;
1966                 break;
1967         default:
1968                 dev = __dev_get_by_index(net, ifindex);
1969                 if (!dev)
1970                         goto errout;
1971                 in_dev = __in_dev_get_rtnl(dev);
1972                 if (!in_dev)
1973                         goto errout;
1974                 devconf = &in_dev->cnf;
1975                 break;
1976         }
1977
1978         err = -ENOBUFS;
1979         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1980         if (!skb)
1981                 goto errout;
1982
1983         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1984                                         NETLINK_CB(in_skb).portid,
1985                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1986                                         NETCONFA_ALL);
1987         if (err < 0) {
1988                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1989                 WARN_ON(err == -EMSGSIZE);
1990                 kfree_skb(skb);
1991                 goto errout;
1992         }
1993         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1994 errout:
1995         return err;
1996 }
1997
1998 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1999                                      struct netlink_callback *cb)
2000 {
2001         struct net *net = sock_net(skb->sk);
2002         int h, s_h;
2003         int idx, s_idx;
2004         struct net_device *dev;
2005         struct in_device *in_dev;
2006         struct hlist_head *head;
2007
2008         s_h = cb->args[0];
2009         s_idx = idx = cb->args[1];
2010
2011         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2012                 idx = 0;
2013                 head = &net->dev_index_head[h];
2014                 rcu_read_lock();
2015                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2016                           net->dev_base_seq;
2017                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2018                         if (idx < s_idx)
2019                                 goto cont;
2020                         in_dev = __in_dev_get_rcu(dev);
2021                         if (!in_dev)
2022                                 goto cont;
2023
2024                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2025                                                       &in_dev->cnf,
2026                                                       NETLINK_CB(cb->skb).portid,
2027                                                       cb->nlh->nlmsg_seq,
2028                                                       RTM_NEWNETCONF,
2029                                                       NLM_F_MULTI,
2030                                                       NETCONFA_ALL) < 0) {
2031                                 rcu_read_unlock();
2032                                 goto done;
2033                         }
2034                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2035 cont:
2036                         idx++;
2037                 }
2038                 rcu_read_unlock();
2039         }
2040         if (h == NETDEV_HASHENTRIES) {
2041                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2042                                               net->ipv4.devconf_all,
2043                                               NETLINK_CB(cb->skb).portid,
2044                                               cb->nlh->nlmsg_seq,
2045                                               RTM_NEWNETCONF, NLM_F_MULTI,
2046                                               NETCONFA_ALL) < 0)
2047                         goto done;
2048                 else
2049                         h++;
2050         }
2051         if (h == NETDEV_HASHENTRIES + 1) {
2052                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2053                                               net->ipv4.devconf_dflt,
2054                                               NETLINK_CB(cb->skb).portid,
2055                                               cb->nlh->nlmsg_seq,
2056                                               RTM_NEWNETCONF, NLM_F_MULTI,
2057                                               NETCONFA_ALL) < 0)
2058                         goto done;
2059                 else
2060                         h++;
2061         }
2062 done:
2063         cb->args[0] = h;
2064         cb->args[1] = idx;
2065
2066         return skb->len;
2067 }
2068
2069 #ifdef CONFIG_SYSCTL
2070
2071 static void devinet_copy_dflt_conf(struct net *net, int i)
2072 {
2073         struct net_device *dev;
2074
2075         rcu_read_lock();
2076         for_each_netdev_rcu(net, dev) {
2077                 struct in_device *in_dev;
2078
2079                 in_dev = __in_dev_get_rcu(dev);
2080                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2081                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2082         }
2083         rcu_read_unlock();
2084 }
2085
2086 /* called with RTNL locked */
2087 static void inet_forward_change(struct net *net)
2088 {
2089         struct net_device *dev;
2090         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2091
2092         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2093         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2094         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2095                                     NETCONFA_FORWARDING,
2096                                     NETCONFA_IFINDEX_ALL,
2097                                     net->ipv4.devconf_all);
2098         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2099                                     NETCONFA_FORWARDING,
2100                                     NETCONFA_IFINDEX_DEFAULT,
2101                                     net->ipv4.devconf_dflt);
2102
2103         for_each_netdev(net, dev) {
2104                 struct in_device *in_dev;
2105
2106                 if (on)
2107                         dev_disable_lro(dev);
2108
2109                 in_dev = __in_dev_get_rtnl(dev);
2110                 if (in_dev) {
2111                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2112                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2113                                                     NETCONFA_FORWARDING,
2114                                                     dev->ifindex, &in_dev->cnf);
2115                 }
2116         }
2117 }
2118
2119 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2120 {
2121         if (cnf == net->ipv4.devconf_dflt)
2122                 return NETCONFA_IFINDEX_DEFAULT;
2123         else if (cnf == net->ipv4.devconf_all)
2124                 return NETCONFA_IFINDEX_ALL;
2125         else {
2126                 struct in_device *idev
2127                         = container_of(cnf, struct in_device, cnf);
2128                 return idev->dev->ifindex;
2129         }
2130 }
2131
2132 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2133                              void __user *buffer,
2134                              size_t *lenp, loff_t *ppos)
2135 {
2136         int old_value = *(int *)ctl->data;
2137         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2138         int new_value = *(int *)ctl->data;
2139
2140         if (write) {
2141                 struct ipv4_devconf *cnf = ctl->extra1;
2142                 struct net *net = ctl->extra2;
2143                 int i = (int *)ctl->data - cnf->data;
2144                 int ifindex;
2145
2146                 set_bit(i, cnf->state);
2147
2148                 if (cnf == net->ipv4.devconf_dflt)
2149                         devinet_copy_dflt_conf(net, i);
2150                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2151                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2152                         if ((new_value == 0) && (old_value != 0))
2153                                 rt_cache_flush(net);
2154
2155                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2156                     new_value != old_value)
2157                         rt_cache_flush(net);
2158
2159                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2160                     new_value != old_value) {
2161                         ifindex = devinet_conf_ifindex(net, cnf);
2162                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2163                                                     NETCONFA_RP_FILTER,
2164                                                     ifindex, cnf);
2165                 }
2166                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2167                     new_value != old_value) {
2168                         ifindex = devinet_conf_ifindex(net, cnf);
2169                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2170                                                     NETCONFA_PROXY_NEIGH,
2171                                                     ifindex, cnf);
2172                 }
2173                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2174                     new_value != old_value) {
2175                         ifindex = devinet_conf_ifindex(net, cnf);
2176                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2177                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2178                                                     ifindex, cnf);
2179                 }
2180         }
2181
2182         return ret;
2183 }
2184
2185 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2186                                   void __user *buffer,
2187                                   size_t *lenp, loff_t *ppos)
2188 {
2189         int *valp = ctl->data;
2190         int val = *valp;
2191         loff_t pos = *ppos;
2192         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2193
2194         if (write && *valp != val) {
2195                 struct net *net = ctl->extra2;
2196
2197                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2198                         if (!rtnl_trylock()) {
2199                                 /* Restore the original values before restarting */
2200                                 *valp = val;
2201                                 *ppos = pos;
2202                                 return restart_syscall();
2203                         }
2204                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2205                                 inet_forward_change(net);
2206                         } else {
2207                                 struct ipv4_devconf *cnf = ctl->extra1;
2208                                 struct in_device *idev =
2209                                         container_of(cnf, struct in_device, cnf);
2210                                 if (*valp)
2211                                         dev_disable_lro(idev->dev);
2212                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2213                                                             NETCONFA_FORWARDING,
2214                                                             idev->dev->ifindex,
2215                                                             cnf);
2216                         }
2217                         rtnl_unlock();
2218                         rt_cache_flush(net);
2219                 } else
2220                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2221                                                     NETCONFA_FORWARDING,
2222                                                     NETCONFA_IFINDEX_DEFAULT,
2223                                                     net->ipv4.devconf_dflt);
2224         }
2225
2226         return ret;
2227 }
2228
2229 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2230                                 void __user *buffer,
2231                                 size_t *lenp, loff_t *ppos)
2232 {
2233         int *valp = ctl->data;
2234         int val = *valp;
2235         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2236         struct net *net = ctl->extra2;
2237
2238         if (write && *valp != val)
2239                 rt_cache_flush(net);
2240
2241         return ret;
2242 }
2243
2244 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2245         { \
2246                 .procname       = name, \
2247                 .data           = ipv4_devconf.data + \
2248                                   IPV4_DEVCONF_ ## attr - 1, \
2249                 .maxlen         = sizeof(int), \
2250                 .mode           = mval, \
2251                 .proc_handler   = proc, \
2252                 .extra1         = &ipv4_devconf, \
2253         }
2254
2255 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2256         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2257
2258 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2259         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2260
2261 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2262         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2263
2264 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2265         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2266
2267 static struct devinet_sysctl_table {
2268         struct ctl_table_header *sysctl_header;
2269         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2270 } devinet_sysctl = {
2271         .devinet_vars = {
2272                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2273                                              devinet_sysctl_forward),
2274                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2275                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2276
2277                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2278                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2279                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2280                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2281                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2282                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2283                                         "accept_source_route"),
2284                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2285                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2286                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2287                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2288                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2289                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2290                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2291                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2292                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2293                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2294                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2295                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2296                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2297                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2298                                         "force_igmp_version"),
2299                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2300                                         "igmpv2_unsolicited_report_interval"),
2301                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2302                                         "igmpv3_unsolicited_report_interval"),
2303                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2304                                         "ignore_routes_with_linkdown"),
2305                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2306                                         "drop_gratuitous_arp"),
2307
2308                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2309                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2310                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2311                                               "promote_secondaries"),
2312                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2313                                               "route_localnet"),
2314                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2315                                               "drop_unicast_in_l2_multicast"),
2316         },
2317 };
2318
2319 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2320                                      int ifindex, struct ipv4_devconf *p)
2321 {
2322         int i;
2323         struct devinet_sysctl_table *t;
2324         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2325
2326         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2327         if (!t)
2328                 goto out;
2329
2330         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2331                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2332                 t->devinet_vars[i].extra1 = p;
2333                 t->devinet_vars[i].extra2 = net;
2334         }
2335
2336         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2337
2338         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2339         if (!t->sysctl_header)
2340                 goto free;
2341
2342         p->sysctl = t;
2343
2344         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2345                                     ifindex, p);
2346         return 0;
2347
2348 free:
2349         kfree(t);
2350 out:
2351         return -ENOBUFS;
2352 }
2353
2354 static void __devinet_sysctl_unregister(struct net *net,
2355                                         struct ipv4_devconf *cnf, int ifindex)
2356 {
2357         struct devinet_sysctl_table *t = cnf->sysctl;
2358
2359         if (t) {
2360                 cnf->sysctl = NULL;
2361                 unregister_net_sysctl_table(t->sysctl_header);
2362                 kfree(t);
2363         }
2364
2365         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2366 }
2367
2368 static int devinet_sysctl_register(struct in_device *idev)
2369 {
2370         int err;
2371
2372         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2373                 return -EINVAL;
2374
2375         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2376         if (err)
2377                 return err;
2378         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2379                                         idev->dev->ifindex, &idev->cnf);
2380         if (err)
2381                 neigh_sysctl_unregister(idev->arp_parms);
2382         return err;
2383 }
2384
2385 static void devinet_sysctl_unregister(struct in_device *idev)
2386 {
2387         struct net *net = dev_net(idev->dev);
2388
2389         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2390         neigh_sysctl_unregister(idev->arp_parms);
2391 }
2392
2393 static struct ctl_table ctl_forward_entry[] = {
2394         {
2395                 .procname       = "ip_forward",
2396                 .data           = &ipv4_devconf.data[
2397                                         IPV4_DEVCONF_FORWARDING - 1],
2398                 .maxlen         = sizeof(int),
2399                 .mode           = 0644,
2400                 .proc_handler   = devinet_sysctl_forward,
2401                 .extra1         = &ipv4_devconf,
2402                 .extra2         = &init_net,
2403         },
2404         { },
2405 };
2406 #endif
2407
2408 static __net_init int devinet_init_net(struct net *net)
2409 {
2410         int err;
2411         struct ipv4_devconf *all, *dflt;
2412 #ifdef CONFIG_SYSCTL
2413         struct ctl_table *tbl = ctl_forward_entry;
2414         struct ctl_table_header *forw_hdr;
2415 #endif
2416
2417         err = -ENOMEM;
2418         all = &ipv4_devconf;
2419         dflt = &ipv4_devconf_dflt;
2420
2421         if (!net_eq(net, &init_net)) {
2422                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2423                 if (!all)
2424                         goto err_alloc_all;
2425
2426                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2427                 if (!dflt)
2428                         goto err_alloc_dflt;
2429
2430 #ifdef CONFIG_SYSCTL
2431                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2432                 if (!tbl)
2433                         goto err_alloc_ctl;
2434
2435                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2436                 tbl[0].extra1 = all;
2437                 tbl[0].extra2 = net;
2438 #endif
2439         }
2440
2441 #ifdef CONFIG_SYSCTL
2442         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2443         if (err < 0)
2444                 goto err_reg_all;
2445
2446         err = __devinet_sysctl_register(net, "default",
2447                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2448         if (err < 0)
2449                 goto err_reg_dflt;
2450
2451         err = -ENOMEM;
2452         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2453         if (!forw_hdr)
2454                 goto err_reg_ctl;
2455         net->ipv4.forw_hdr = forw_hdr;
2456 #endif
2457
2458         net->ipv4.devconf_all = all;
2459         net->ipv4.devconf_dflt = dflt;
2460         return 0;
2461
2462 #ifdef CONFIG_SYSCTL
2463 err_reg_ctl:
2464         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2465 err_reg_dflt:
2466         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2467 err_reg_all:
2468         if (tbl != ctl_forward_entry)
2469                 kfree(tbl);
2470 err_alloc_ctl:
2471 #endif
2472         if (dflt != &ipv4_devconf_dflt)
2473                 kfree(dflt);
2474 err_alloc_dflt:
2475         if (all != &ipv4_devconf)
2476                 kfree(all);
2477 err_alloc_all:
2478         return err;
2479 }
2480
2481 static __net_exit void devinet_exit_net(struct net *net)
2482 {
2483 #ifdef CONFIG_SYSCTL
2484         struct ctl_table *tbl;
2485
2486         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2487         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2488         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2489                                     NETCONFA_IFINDEX_DEFAULT);
2490         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2491                                     NETCONFA_IFINDEX_ALL);
2492         kfree(tbl);
2493 #endif
2494         kfree(net->ipv4.devconf_dflt);
2495         kfree(net->ipv4.devconf_all);
2496 }
2497
2498 static __net_initdata struct pernet_operations devinet_ops = {
2499         .init = devinet_init_net,
2500         .exit = devinet_exit_net,
2501 };
2502
2503 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2504         .family           = AF_INET,
2505         .fill_link_af     = inet_fill_link_af,
2506         .get_link_af_size = inet_get_link_af_size,
2507         .validate_link_af = inet_validate_link_af,
2508         .set_link_af      = inet_set_link_af,
2509 };
2510
2511 void __init devinet_init(void)
2512 {
2513         int i;
2514
2515         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2516                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2517
2518         register_pernet_subsys(&devinet_ops);
2519
2520         register_gifconf(PF_INET, inet_gifconf);
2521         register_netdevice_notifier(&ip_netdev_notifier);
2522
2523         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2524
2525         rtnl_af_register(&inet_af_ops);
2526
2527         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2528         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2529         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2530         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2531                       inet_netconf_dump_devconf, 0);
2532 }