Merge branch 'master' of git://1984.lsi.us.es/nf
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80                                      const struct in6_addr *addr)
81 {
82         struct flowi6 fl6 = {
83                 .daddr = *addr,
84         };
85         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86         bool is_local;
87
88         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90         dst_release(dst);
91         return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned int porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276         if (af == AF_INET6)
277                 addr_fold = addr->ip6[0]^addr->ip6[1]^
278                             addr->ip6[2]^addr->ip6[3];
279 #endif
280         addr_fold ^= ((size_t)net>>8);
281
282         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283                 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *      or in the ip_vs_svc_fwm_table by fwmark.
297  *      Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301         unsigned int hash;
302
303         if (svc->flags & IP_VS_SVC_F_HASHED) {
304                 pr_err("%s(): request for already hashed, called from %pF\n",
305                        __func__, __builtin_return_address(0));
306                 return 0;
307         }
308
309         if (svc->fwmark == 0) {
310                 /*
311                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312                  */
313                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314                                          &svc->addr, svc->port);
315                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316         } else {
317                 /*
318                  *  Hash it by fwmark in svc_fwm_table
319                  */
320                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322         }
323
324         svc->flags |= IP_VS_SVC_F_HASHED;
325         /* increase its refcnt because it is referenced by the svc table */
326         atomic_inc(&svc->refcnt);
327         return 1;
328 }
329
330
331 /*
332  *      Unhashes a service from svc_table / svc_fwm_table.
333  *      Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338                 pr_err("%s(): request for unhash flagged, called from %pF\n",
339                        __func__, __builtin_return_address(0));
340                 return 0;
341         }
342
343         if (svc->fwmark == 0) {
344                 /* Remove it from the svc_table table */
345                 list_del(&svc->s_list);
346         } else {
347                 /* Remove it from the svc_fwm_table table */
348                 list_del(&svc->f_list);
349         }
350
351         svc->flags &= ~IP_VS_SVC_F_HASHED;
352         atomic_dec(&svc->refcnt);
353         return 1;
354 }
355
356
357 /*
358  *      Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362                      const union nf_inet_addr *vaddr, __be16 vport)
363 {
364         unsigned int hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->af == af)
372                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
373                     && (svc->port == vport)
374                     && (svc->protocol == protocol)
375                     && net_eq(svc->net, net)) {
376                         /* HIT */
377                         return svc;
378                 }
379         }
380
381         return NULL;
382 }
383
384
385 /*
386  *      Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391         unsigned int hash;
392         struct ip_vs_service *svc;
393
394         /* Check for fwmark addressed entries */
395         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398                 if (svc->fwmark == fwmark && svc->af == af
399                     && net_eq(svc->net, net)) {
400                         /* HIT */
401                         return svc;
402                 }
403         }
404
405         return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410                   const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413         struct netns_ipvs *ipvs = net_ipvs(net);
414
415         read_lock(&__ip_vs_svc_lock);
416
417         /*
418          *      Check the table hashed by fwmark first
419          */
420         if (fwmark) {
421                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422                 if (svc)
423                         goto out;
424         }
425
426         /*
427          *      Check the table hashed by <protocol,addr,port>
428          *      for "full" addressed entries
429          */
430         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432         if (svc == NULL
433             && protocol == IPPROTO_TCP
434             && atomic_read(&ipvs->ftpsvc_counter)
435             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436                 /*
437                  * Check if ftp service entry exists, the packet
438                  * might belong to FTP data connections.
439                  */
440                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441         }
442
443         if (svc == NULL
444             && atomic_read(&ipvs->nullsvc_counter)) {
445                 /*
446                  * Check if the catch-all port (port zero) exists
447                  */
448                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449         }
450
451   out:
452         if (svc)
453                 atomic_inc(&svc->usecnt);
454         read_unlock(&__ip_vs_svc_lock);
455
456         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457                       fwmark, ip_vs_proto_name(protocol),
458                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459                       svc ? "hit" : "not hit");
460
461         return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468         atomic_inc(&svc->refcnt);
469         dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475         struct ip_vs_service *svc = dest->svc;
476
477         dest->svc = NULL;
478         if (atomic_dec_and_test(&svc->refcnt)) {
479                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480                               svc->fwmark,
481                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
482                               ntohs(svc->port), atomic_read(&svc->usecnt));
483                 free_percpu(svc->stats.cpustats);
484                 kfree(svc);
485         }
486 }
487
488
489 /*
490  *      Returns hash value for real service
491  */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493                                             const union nf_inet_addr *addr,
494                                             __be16 port)
495 {
496         register unsigned int porth = ntohs(port);
497         __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500         if (af == AF_INET6)
501                 addr_fold = addr->ip6[0]^addr->ip6[1]^
502                             addr->ip6[2]^addr->ip6[3];
503 #endif
504
505         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506                 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *      should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515         unsigned int hash;
516
517         if (!list_empty(&dest->d_list)) {
518                 return 0;
519         }
520
521         /*
522          *      Hash by proto,addr,port,
523          *      which are the parameters of the real service.
524          */
525         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527         list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529         return 1;
530 }
531
532 /*
533  *      UNhashes ip_vs_dest from rs_table.
534  *      should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538         /*
539          * Remove it from the rs_table table.
540          */
541         if (!list_empty(&dest->d_list)) {
542                 list_del(&dest->d_list);
543                 INIT_LIST_HEAD(&dest->d_list);
544         }
545
546         return 1;
547 }
548
549 /*
550  *      Lookup real service by <proto,addr,port> in the real service table.
551  */
552 struct ip_vs_dest *
553 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
554                           const union nf_inet_addr *daddr,
555                           __be16 dport)
556 {
557         struct netns_ipvs *ipvs = net_ipvs(net);
558         unsigned int hash;
559         struct ip_vs_dest *dest;
560
561         /*
562          *      Check for "full" addressed entries
563          *      Return the first found entry
564          */
565         hash = ip_vs_rs_hashkey(af, daddr, dport);
566
567         read_lock(&ipvs->rs_lock);
568         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
569                 if ((dest->af == af)
570                     && ip_vs_addr_equal(af, &dest->addr, daddr)
571                     && (dest->port == dport)
572                     && ((dest->protocol == protocol) ||
573                         dest->vfwmark)) {
574                         /* HIT */
575                         read_unlock(&ipvs->rs_lock);
576                         return dest;
577                 }
578         }
579         read_unlock(&ipvs->rs_lock);
580
581         return NULL;
582 }
583
584 /*
585  *      Lookup destination by {addr,port} in the given service
586  */
587 static struct ip_vs_dest *
588 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589                   __be16 dport)
590 {
591         struct ip_vs_dest *dest;
592
593         /*
594          * Find the destination for the given service
595          */
596         list_for_each_entry(dest, &svc->destinations, n_list) {
597                 if ((dest->af == svc->af)
598                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599                     && (dest->port == dport)) {
600                         /* HIT */
601                         return dest;
602                 }
603         }
604
605         return NULL;
606 }
607
608 /*
609  * Find destination by {daddr,dport,vaddr,protocol}
610  * Cretaed to be used in ip_vs_process_message() in
611  * the backup synchronization daemon. It finds the
612  * destination to be bound to the received connection
613  * on the backup.
614  *
615  * ip_vs_lookup_real_service() looked promissing, but
616  * seems not working as expected.
617  */
618 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
619                                    const union nf_inet_addr *daddr,
620                                    __be16 dport,
621                                    const union nf_inet_addr *vaddr,
622                                    __be16 vport, __u16 protocol, __u32 fwmark,
623                                    __u32 flags)
624 {
625         struct ip_vs_dest *dest;
626         struct ip_vs_service *svc;
627         __be16 port = dport;
628
629         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
630         if (!svc)
631                 return NULL;
632         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
633                 port = 0;
634         dest = ip_vs_lookup_dest(svc, daddr, port);
635         if (!dest)
636                 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
637         if (dest)
638                 atomic_inc(&dest->refcnt);
639         ip_vs_service_put(svc);
640         return dest;
641 }
642
643 /*
644  *  Lookup dest by {svc,addr,port} in the destination trash.
645  *  The destination trash is used to hold the destinations that are removed
646  *  from the service table but are still referenced by some conn entries.
647  *  The reason to add the destination trash is when the dest is temporary
648  *  down (either by administrator or by monitor program), the dest can be
649  *  picked back from the trash, the remaining connections to the dest can
650  *  continue, and the counting information of the dest is also useful for
651  *  scheduling.
652  */
653 static struct ip_vs_dest *
654 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
655                      __be16 dport)
656 {
657         struct ip_vs_dest *dest, *nxt;
658         struct netns_ipvs *ipvs = net_ipvs(svc->net);
659
660         /*
661          * Find the destination in trash
662          */
663         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
664                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
665                               "dest->refcnt=%d\n",
666                               dest->vfwmark,
667                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
668                               ntohs(dest->port),
669                               atomic_read(&dest->refcnt));
670                 if (dest->af == svc->af &&
671                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
672                     dest->port == dport &&
673                     dest->vfwmark == svc->fwmark &&
674                     dest->protocol == svc->protocol &&
675                     (svc->fwmark ||
676                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
677                       dest->vport == svc->port))) {
678                         /* HIT */
679                         return dest;
680                 }
681
682                 /*
683                  * Try to purge the destination from trash if not referenced
684                  */
685                 if (atomic_read(&dest->refcnt) == 1) {
686                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
687                                       "from trash\n",
688                                       dest->vfwmark,
689                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
690                                       ntohs(dest->port));
691                         list_del(&dest->n_list);
692                         ip_vs_dst_reset(dest);
693                         __ip_vs_unbind_svc(dest);
694                         free_percpu(dest->stats.cpustats);
695                         kfree(dest);
696                 }
697         }
698
699         return NULL;
700 }
701
702
703 /*
704  *  Clean up all the destinations in the trash
705  *  Called by the ip_vs_control_cleanup()
706  *
707  *  When the ip_vs_control_clearup is activated by ipvs module exit,
708  *  the service tables must have been flushed and all the connections
709  *  are expired, and the refcnt of each destination in the trash must
710  *  be 1, so we simply release them here.
711  */
712 static void ip_vs_trash_cleanup(struct net *net)
713 {
714         struct ip_vs_dest *dest, *nxt;
715         struct netns_ipvs *ipvs = net_ipvs(net);
716
717         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
718                 list_del(&dest->n_list);
719                 ip_vs_dst_reset(dest);
720                 __ip_vs_unbind_svc(dest);
721                 free_percpu(dest->stats.cpustats);
722                 kfree(dest);
723         }
724 }
725
726 static void
727 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
728 {
729 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
730
731         spin_lock_bh(&src->lock);
732
733         IP_VS_SHOW_STATS_COUNTER(conns);
734         IP_VS_SHOW_STATS_COUNTER(inpkts);
735         IP_VS_SHOW_STATS_COUNTER(outpkts);
736         IP_VS_SHOW_STATS_COUNTER(inbytes);
737         IP_VS_SHOW_STATS_COUNTER(outbytes);
738
739         ip_vs_read_estimator(dst, src);
740
741         spin_unlock_bh(&src->lock);
742 }
743
744 static void
745 ip_vs_zero_stats(struct ip_vs_stats *stats)
746 {
747         spin_lock_bh(&stats->lock);
748
749         /* get current counters as zero point, rates are zeroed */
750
751 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
752
753         IP_VS_ZERO_STATS_COUNTER(conns);
754         IP_VS_ZERO_STATS_COUNTER(inpkts);
755         IP_VS_ZERO_STATS_COUNTER(outpkts);
756         IP_VS_ZERO_STATS_COUNTER(inbytes);
757         IP_VS_ZERO_STATS_COUNTER(outbytes);
758
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769                     struct ip_vs_dest_user_kern *udest, int add)
770 {
771         struct netns_ipvs *ipvs = net_ipvs(svc->net);
772         int conn_flags;
773
774         /* set the weight and the flags */
775         atomic_set(&dest->weight, udest->weight);
776         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777         conn_flags |= IP_VS_CONN_F_INACTIVE;
778
779         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
780         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
781                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782         } else {
783                 /*
784                  *    Put the real service in rs_table if not present.
785                  *    For now only for NAT!
786                  */
787                 write_lock_bh(&ipvs->rs_lock);
788                 ip_vs_rs_hash(ipvs, dest);
789                 write_unlock_bh(&ipvs->rs_lock);
790         }
791         atomic_set(&dest->conn_flags, conn_flags);
792
793         /* bind the service */
794         if (!dest->svc) {
795                 __ip_vs_bind_svc(dest, svc);
796         } else {
797                 if (dest->svc != svc) {
798                         __ip_vs_unbind_svc(dest);
799                         ip_vs_zero_stats(&dest->stats);
800                         __ip_vs_bind_svc(dest, svc);
801                 }
802         }
803
804         /* set the dest status flags */
805         dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809         dest->u_threshold = udest->u_threshold;
810         dest->l_threshold = udest->l_threshold;
811
812         spin_lock_bh(&dest->dst_lock);
813         ip_vs_dst_reset(dest);
814         spin_unlock_bh(&dest->dst_lock);
815
816         if (add)
817                 ip_vs_start_estimator(svc->net, &dest->stats);
818
819         write_lock_bh(&__ip_vs_svc_lock);
820
821         /* Wait until all other svc users go away */
822         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824         if (add) {
825                 list_add(&dest->n_list, &svc->destinations);
826                 svc->num_dests++;
827         }
828
829         /* call the update_service, because server weight may be changed */
830         if (svc->scheduler->update_service)
831                 svc->scheduler->update_service(svc);
832
833         write_unlock_bh(&__ip_vs_svc_lock);
834 }
835
836
837 /*
838  *      Create a destination for the given service
839  */
840 static int
841 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
842                struct ip_vs_dest **dest_p)
843 {
844         struct ip_vs_dest *dest;
845         unsigned int atype;
846
847         EnterFunction(2);
848
849 #ifdef CONFIG_IP_VS_IPV6
850         if (svc->af == AF_INET6) {
851                 atype = ipv6_addr_type(&udest->addr.in6);
852                 if ((!(atype & IPV6_ADDR_UNICAST) ||
853                         atype & IPV6_ADDR_LINKLOCAL) &&
854                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
855                         return -EINVAL;
856         } else
857 #endif
858         {
859                 atype = inet_addr_type(svc->net, udest->addr.ip);
860                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861                         return -EINVAL;
862         }
863
864         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
865         if (dest == NULL)
866                 return -ENOMEM;
867
868         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
869         if (!dest->stats.cpustats)
870                 goto err_alloc;
871
872         dest->af = svc->af;
873         dest->protocol = svc->protocol;
874         dest->vaddr = svc->addr;
875         dest->vport = svc->port;
876         dest->vfwmark = svc->fwmark;
877         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
878         dest->port = udest->port;
879
880         atomic_set(&dest->activeconns, 0);
881         atomic_set(&dest->inactconns, 0);
882         atomic_set(&dest->persistconns, 0);
883         atomic_set(&dest->refcnt, 1);
884
885         INIT_LIST_HEAD(&dest->d_list);
886         spin_lock_init(&dest->dst_lock);
887         spin_lock_init(&dest->stats.lock);
888         __ip_vs_update_dest(svc, dest, udest, 1);
889
890         *dest_p = dest;
891
892         LeaveFunction(2);
893         return 0;
894
895 err_alloc:
896         kfree(dest);
897         return -ENOMEM;
898 }
899
900
901 /*
902  *      Add a destination into an existing service
903  */
904 static int
905 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
906 {
907         struct ip_vs_dest *dest;
908         union nf_inet_addr daddr;
909         __be16 dport = udest->port;
910         int ret;
911
912         EnterFunction(2);
913
914         if (udest->weight < 0) {
915                 pr_err("%s(): server weight less than zero\n", __func__);
916                 return -ERANGE;
917         }
918
919         if (udest->l_threshold > udest->u_threshold) {
920                 pr_err("%s(): lower threshold is higher than upper threshold\n",
921                         __func__);
922                 return -ERANGE;
923         }
924
925         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
926
927         /*
928          * Check if the dest already exists in the list
929          */
930         dest = ip_vs_lookup_dest(svc, &daddr, dport);
931
932         if (dest != NULL) {
933                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
934                 return -EEXIST;
935         }
936
937         /*
938          * Check if the dest already exists in the trash and
939          * is from the same service
940          */
941         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
942
943         if (dest != NULL) {
944                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
945                               "dest->refcnt=%d, service %u/%s:%u\n",
946                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
947                               atomic_read(&dest->refcnt),
948                               dest->vfwmark,
949                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
950                               ntohs(dest->vport));
951
952                 /*
953                  * Get the destination from the trash
954                  */
955                 list_del(&dest->n_list);
956
957                 __ip_vs_update_dest(svc, dest, udest, 1);
958                 ret = 0;
959         } else {
960                 /*
961                  * Allocate and initialize the dest structure
962                  */
963                 ret = ip_vs_new_dest(svc, udest, &dest);
964         }
965         LeaveFunction(2);
966
967         return ret;
968 }
969
970
971 /*
972  *      Edit a destination in the given service
973  */
974 static int
975 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
976 {
977         struct ip_vs_dest *dest;
978         union nf_inet_addr daddr;
979         __be16 dport = udest->port;
980
981         EnterFunction(2);
982
983         if (udest->weight < 0) {
984                 pr_err("%s(): server weight less than zero\n", __func__);
985                 return -ERANGE;
986         }
987
988         if (udest->l_threshold > udest->u_threshold) {
989                 pr_err("%s(): lower threshold is higher than upper threshold\n",
990                         __func__);
991                 return -ERANGE;
992         }
993
994         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
995
996         /*
997          *  Lookup the destination list
998          */
999         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1000
1001         if (dest == NULL) {
1002                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1003                 return -ENOENT;
1004         }
1005
1006         __ip_vs_update_dest(svc, dest, udest, 0);
1007         LeaveFunction(2);
1008
1009         return 0;
1010 }
1011
1012
1013 /*
1014  *      Delete a destination (must be already unlinked from the service)
1015  */
1016 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1017 {
1018         struct netns_ipvs *ipvs = net_ipvs(net);
1019
1020         ip_vs_stop_estimator(net, &dest->stats);
1021
1022         /*
1023          *  Remove it from the d-linked list with the real services.
1024          */
1025         write_lock_bh(&ipvs->rs_lock);
1026         ip_vs_rs_unhash(dest);
1027         write_unlock_bh(&ipvs->rs_lock);
1028
1029         /*
1030          *  Decrease the refcnt of the dest, and free the dest
1031          *  if nobody refers to it (refcnt=0). Otherwise, throw
1032          *  the destination into the trash.
1033          */
1034         if (atomic_dec_and_test(&dest->refcnt)) {
1035                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1036                               dest->vfwmark,
1037                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1038                               ntohs(dest->port));
1039                 ip_vs_dst_reset(dest);
1040                 /* simply decrease svc->refcnt here, let the caller check
1041                    and release the service if nobody refers to it.
1042                    Only user context can release destination and service,
1043                    and only one user context can update virtual service at a
1044                    time, so the operation here is OK */
1045                 atomic_dec(&dest->svc->refcnt);
1046                 free_percpu(dest->stats.cpustats);
1047                 kfree(dest);
1048         } else {
1049                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1050                               "dest->refcnt=%d\n",
1051                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1052                               ntohs(dest->port),
1053                               atomic_read(&dest->refcnt));
1054                 list_add(&dest->n_list, &ipvs->dest_trash);
1055                 atomic_inc(&dest->refcnt);
1056         }
1057 }
1058
1059
1060 /*
1061  *      Unlink a destination from the given service
1062  */
1063 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064                                 struct ip_vs_dest *dest,
1065                                 int svcupd)
1066 {
1067         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069         /*
1070          *  Remove it from the d-linked destination list.
1071          */
1072         list_del(&dest->n_list);
1073         svc->num_dests--;
1074
1075         /*
1076          *  Call the update_service function of its scheduler
1077          */
1078         if (svcupd && svc->scheduler->update_service)
1079                         svc->scheduler->update_service(svc);
1080 }
1081
1082
1083 /*
1084  *      Delete a destination server in the given service
1085  */
1086 static int
1087 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1088 {
1089         struct ip_vs_dest *dest;
1090         __be16 dport = udest->port;
1091
1092         EnterFunction(2);
1093
1094         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1095
1096         if (dest == NULL) {
1097                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1098                 return -ENOENT;
1099         }
1100
1101         write_lock_bh(&__ip_vs_svc_lock);
1102
1103         /*
1104          *      Wait until all other svc users go away.
1105          */
1106         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1107
1108         /*
1109          *      Unlink dest from the service
1110          */
1111         __ip_vs_unlink_dest(svc, dest, 1);
1112
1113         write_unlock_bh(&__ip_vs_svc_lock);
1114
1115         /*
1116          *      Delete the destination
1117          */
1118         __ip_vs_del_dest(svc->net, dest);
1119
1120         LeaveFunction(2);
1121
1122         return 0;
1123 }
1124
1125
1126 /*
1127  *      Add a service into the service hash table
1128  */
1129 static int
1130 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1131                   struct ip_vs_service **svc_p)
1132 {
1133         int ret = 0;
1134         struct ip_vs_scheduler *sched = NULL;
1135         struct ip_vs_pe *pe = NULL;
1136         struct ip_vs_service *svc = NULL;
1137         struct netns_ipvs *ipvs = net_ipvs(net);
1138
1139         /* increase the module use count */
1140         ip_vs_use_count_inc();
1141
1142         /* Lookup the scheduler by 'u->sched_name' */
1143         sched = ip_vs_scheduler_get(u->sched_name);
1144         if (sched == NULL) {
1145                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1146                 ret = -ENOENT;
1147                 goto out_err;
1148         }
1149
1150         if (u->pe_name && *u->pe_name) {
1151                 pe = ip_vs_pe_getbyname(u->pe_name);
1152                 if (pe == NULL) {
1153                         pr_info("persistence engine module ip_vs_pe_%s "
1154                                 "not found\n", u->pe_name);
1155                         ret = -ENOENT;
1156                         goto out_err;
1157                 }
1158         }
1159
1160 #ifdef CONFIG_IP_VS_IPV6
1161         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1162                 ret = -EINVAL;
1163                 goto out_err;
1164         }
1165 #endif
1166
1167         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1168         if (svc == NULL) {
1169                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1170                 ret = -ENOMEM;
1171                 goto out_err;
1172         }
1173         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1174         if (!svc->stats.cpustats) {
1175                 ret = -ENOMEM;
1176                 goto out_err;
1177         }
1178
1179         /* I'm the first user of the service */
1180         atomic_set(&svc->usecnt, 0);
1181         atomic_set(&svc->refcnt, 0);
1182
1183         svc->af = u->af;
1184         svc->protocol = u->protocol;
1185         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1186         svc->port = u->port;
1187         svc->fwmark = u->fwmark;
1188         svc->flags = u->flags;
1189         svc->timeout = u->timeout * HZ;
1190         svc->netmask = u->netmask;
1191         svc->net = net;
1192
1193         INIT_LIST_HEAD(&svc->destinations);
1194         rwlock_init(&svc->sched_lock);
1195         spin_lock_init(&svc->stats.lock);
1196
1197         /* Bind the scheduler */
1198         ret = ip_vs_bind_scheduler(svc, sched);
1199         if (ret)
1200                 goto out_err;
1201         sched = NULL;
1202
1203         /* Bind the ct retriever */
1204         ip_vs_bind_pe(svc, pe);
1205         pe = NULL;
1206
1207         /* Update the virtual service counters */
1208         if (svc->port == FTPPORT)
1209                 atomic_inc(&ipvs->ftpsvc_counter);
1210         else if (svc->port == 0)
1211                 atomic_inc(&ipvs->nullsvc_counter);
1212
1213         ip_vs_start_estimator(net, &svc->stats);
1214
1215         /* Count only IPv4 services for old get/setsockopt interface */
1216         if (svc->af == AF_INET)
1217                 ipvs->num_services++;
1218
1219         /* Hash the service into the service table */
1220         write_lock_bh(&__ip_vs_svc_lock);
1221         ip_vs_svc_hash(svc);
1222         write_unlock_bh(&__ip_vs_svc_lock);
1223
1224         *svc_p = svc;
1225         /* Now there is a service - full throttle */
1226         ipvs->enable = 1;
1227         return 0;
1228
1229
1230  out_err:
1231         if (svc != NULL) {
1232                 ip_vs_unbind_scheduler(svc);
1233                 if (svc->inc) {
1234                         local_bh_disable();
1235                         ip_vs_app_inc_put(svc->inc);
1236                         local_bh_enable();
1237                 }
1238                 if (svc->stats.cpustats)
1239                         free_percpu(svc->stats.cpustats);
1240                 kfree(svc);
1241         }
1242         ip_vs_scheduler_put(sched);
1243         ip_vs_pe_put(pe);
1244
1245         /* decrease the module use count */
1246         ip_vs_use_count_dec();
1247
1248         return ret;
1249 }
1250
1251
1252 /*
1253  *      Edit a service and bind it with a new scheduler
1254  */
1255 static int
1256 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1257 {
1258         struct ip_vs_scheduler *sched, *old_sched;
1259         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1260         int ret = 0;
1261
1262         /*
1263          * Lookup the scheduler, by 'u->sched_name'
1264          */
1265         sched = ip_vs_scheduler_get(u->sched_name);
1266         if (sched == NULL) {
1267                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1268                 return -ENOENT;
1269         }
1270         old_sched = sched;
1271
1272         if (u->pe_name && *u->pe_name) {
1273                 pe = ip_vs_pe_getbyname(u->pe_name);
1274                 if (pe == NULL) {
1275                         pr_info("persistence engine module ip_vs_pe_%s "
1276                                 "not found\n", u->pe_name);
1277                         ret = -ENOENT;
1278                         goto out;
1279                 }
1280                 old_pe = pe;
1281         }
1282
1283 #ifdef CONFIG_IP_VS_IPV6
1284         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1285                 ret = -EINVAL;
1286                 goto out;
1287         }
1288 #endif
1289
1290         write_lock_bh(&__ip_vs_svc_lock);
1291
1292         /*
1293          * Wait until all other svc users go away.
1294          */
1295         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1296
1297         /*
1298          * Set the flags and timeout value
1299          */
1300         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1301         svc->timeout = u->timeout * HZ;
1302         svc->netmask = u->netmask;
1303
1304         old_sched = svc->scheduler;
1305         if (sched != old_sched) {
1306                 /*
1307                  * Unbind the old scheduler
1308                  */
1309                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1310                         old_sched = sched;
1311                         goto out_unlock;
1312                 }
1313
1314                 /*
1315                  * Bind the new scheduler
1316                  */
1317                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1318                         /*
1319                          * If ip_vs_bind_scheduler fails, restore the old
1320                          * scheduler.
1321                          * The main reason of failure is out of memory.
1322                          *
1323                          * The question is if the old scheduler can be
1324                          * restored all the time. TODO: if it cannot be
1325                          * restored some time, we must delete the service,
1326                          * otherwise the system may crash.
1327                          */
1328                         ip_vs_bind_scheduler(svc, old_sched);
1329                         old_sched = sched;
1330                         goto out_unlock;
1331                 }
1332         }
1333
1334         old_pe = svc->pe;
1335         if (pe != old_pe) {
1336                 ip_vs_unbind_pe(svc);
1337                 ip_vs_bind_pe(svc, pe);
1338         }
1339
1340 out_unlock:
1341         write_unlock_bh(&__ip_vs_svc_lock);
1342 out:
1343         ip_vs_scheduler_put(old_sched);
1344         ip_vs_pe_put(old_pe);
1345         return ret;
1346 }
1347
1348
1349 /*
1350  *      Delete a service from the service list
1351  *      - The service must be unlinked, unlocked and not referenced!
1352  *      - We are called under _bh lock
1353  */
1354 static void __ip_vs_del_service(struct ip_vs_service *svc)
1355 {
1356         struct ip_vs_dest *dest, *nxt;
1357         struct ip_vs_scheduler *old_sched;
1358         struct ip_vs_pe *old_pe;
1359         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1360
1361         pr_info("%s: enter\n", __func__);
1362
1363         /* Count only IPv4 services for old get/setsockopt interface */
1364         if (svc->af == AF_INET)
1365                 ipvs->num_services--;
1366
1367         ip_vs_stop_estimator(svc->net, &svc->stats);
1368
1369         /* Unbind scheduler */
1370         old_sched = svc->scheduler;
1371         ip_vs_unbind_scheduler(svc);
1372         ip_vs_scheduler_put(old_sched);
1373
1374         /* Unbind persistence engine */
1375         old_pe = svc->pe;
1376         ip_vs_unbind_pe(svc);
1377         ip_vs_pe_put(old_pe);
1378
1379         /* Unbind app inc */
1380         if (svc->inc) {
1381                 ip_vs_app_inc_put(svc->inc);
1382                 svc->inc = NULL;
1383         }
1384
1385         /*
1386          *    Unlink the whole destination list
1387          */
1388         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1389                 __ip_vs_unlink_dest(svc, dest, 0);
1390                 __ip_vs_del_dest(svc->net, dest);
1391         }
1392
1393         /*
1394          *    Update the virtual service counters
1395          */
1396         if (svc->port == FTPPORT)
1397                 atomic_dec(&ipvs->ftpsvc_counter);
1398         else if (svc->port == 0)
1399                 atomic_dec(&ipvs->nullsvc_counter);
1400
1401         /*
1402          *    Free the service if nobody refers to it
1403          */
1404         if (atomic_read(&svc->refcnt) == 0) {
1405                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1406                               svc->fwmark,
1407                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1408                               ntohs(svc->port), atomic_read(&svc->usecnt));
1409                 free_percpu(svc->stats.cpustats);
1410                 kfree(svc);
1411         }
1412
1413         /* decrease the module use count */
1414         ip_vs_use_count_dec();
1415 }
1416
1417 /*
1418  * Unlink a service from list and try to delete it if its refcnt reached 0
1419  */
1420 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1421 {
1422         /*
1423          * Unhash it from the service table
1424          */
1425         write_lock_bh(&__ip_vs_svc_lock);
1426
1427         ip_vs_svc_unhash(svc);
1428
1429         /*
1430          * Wait until all the svc users go away.
1431          */
1432         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1433
1434         __ip_vs_del_service(svc);
1435
1436         write_unlock_bh(&__ip_vs_svc_lock);
1437 }
1438
1439 /*
1440  *      Delete a service from the service list
1441  */
1442 static int ip_vs_del_service(struct ip_vs_service *svc)
1443 {
1444         if (svc == NULL)
1445                 return -EEXIST;
1446         ip_vs_unlink_service(svc);
1447
1448         return 0;
1449 }
1450
1451
1452 /*
1453  *      Flush all the virtual services
1454  */
1455 static int ip_vs_flush(struct net *net)
1456 {
1457         int idx;
1458         struct ip_vs_service *svc, *nxt;
1459
1460         /*
1461          * Flush the service table hashed by <netns,protocol,addr,port>
1462          */
1463         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1464                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1465                                          s_list) {
1466                         if (net_eq(svc->net, net))
1467                                 ip_vs_unlink_service(svc);
1468                 }
1469         }
1470
1471         /*
1472          * Flush the service table hashed by fwmark
1473          */
1474         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1475                 list_for_each_entry_safe(svc, nxt,
1476                                          &ip_vs_svc_fwm_table[idx], f_list) {
1477                         if (net_eq(svc->net, net))
1478                                 ip_vs_unlink_service(svc);
1479                 }
1480         }
1481
1482         return 0;
1483 }
1484
1485 /*
1486  *      Delete service by {netns} in the service table.
1487  *      Called by __ip_vs_cleanup()
1488  */
1489 void ip_vs_service_net_cleanup(struct net *net)
1490 {
1491         EnterFunction(2);
1492         /* Check for "full" addressed entries */
1493         mutex_lock(&__ip_vs_mutex);
1494         ip_vs_flush(net);
1495         mutex_unlock(&__ip_vs_mutex);
1496         LeaveFunction(2);
1497 }
1498 /*
1499  * Release dst hold by dst_cache
1500  */
1501 static inline void
1502 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1503 {
1504         spin_lock_bh(&dest->dst_lock);
1505         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1506                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1507                               dev->name,
1508                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1509                               ntohs(dest->port),
1510                               atomic_read(&dest->refcnt));
1511                 ip_vs_dst_reset(dest);
1512         }
1513         spin_unlock_bh(&dest->dst_lock);
1514
1515 }
1516 /*
1517  * Netdev event receiver
1518  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1519  * a device that is "unregister" it must be released.
1520  */
1521 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1522                             void *ptr)
1523 {
1524         struct net_device *dev = ptr;
1525         struct net *net = dev_net(dev);
1526         struct netns_ipvs *ipvs = net_ipvs(net);
1527         struct ip_vs_service *svc;
1528         struct ip_vs_dest *dest;
1529         unsigned int idx;
1530
1531         if (event != NETDEV_UNREGISTER || !ipvs)
1532                 return NOTIFY_DONE;
1533         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1534         EnterFunction(2);
1535         mutex_lock(&__ip_vs_mutex);
1536         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1537                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1538                         if (net_eq(svc->net, net)) {
1539                                 list_for_each_entry(dest, &svc->destinations,
1540                                                     n_list) {
1541                                         __ip_vs_dev_reset(dest, dev);
1542                                 }
1543                         }
1544                 }
1545
1546                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1547                         if (net_eq(svc->net, net)) {
1548                                 list_for_each_entry(dest, &svc->destinations,
1549                                                     n_list) {
1550                                         __ip_vs_dev_reset(dest, dev);
1551                                 }
1552                         }
1553
1554                 }
1555         }
1556
1557         list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1558                 __ip_vs_dev_reset(dest, dev);
1559         }
1560         mutex_unlock(&__ip_vs_mutex);
1561         LeaveFunction(2);
1562         return NOTIFY_DONE;
1563 }
1564
1565 /*
1566  *      Zero counters in a service or all services
1567  */
1568 static int ip_vs_zero_service(struct ip_vs_service *svc)
1569 {
1570         struct ip_vs_dest *dest;
1571
1572         write_lock_bh(&__ip_vs_svc_lock);
1573         list_for_each_entry(dest, &svc->destinations, n_list) {
1574                 ip_vs_zero_stats(&dest->stats);
1575         }
1576         ip_vs_zero_stats(&svc->stats);
1577         write_unlock_bh(&__ip_vs_svc_lock);
1578         return 0;
1579 }
1580
1581 static int ip_vs_zero_all(struct net *net)
1582 {
1583         int idx;
1584         struct ip_vs_service *svc;
1585
1586         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1587                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1588                         if (net_eq(svc->net, net))
1589                                 ip_vs_zero_service(svc);
1590                 }
1591         }
1592
1593         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1594                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1595                         if (net_eq(svc->net, net))
1596                                 ip_vs_zero_service(svc);
1597                 }
1598         }
1599
1600         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1601         return 0;
1602 }
1603
1604 #ifdef CONFIG_SYSCTL
1605
1606 static int zero;
1607 static int three = 3;
1608
1609 static int
1610 proc_do_defense_mode(ctl_table *table, int write,
1611                      void __user *buffer, size_t *lenp, loff_t *ppos)
1612 {
1613         struct net *net = current->nsproxy->net_ns;
1614         int *valp = table->data;
1615         int val = *valp;
1616         int rc;
1617
1618         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1619         if (write && (*valp != val)) {
1620                 if ((*valp < 0) || (*valp > 3)) {
1621                         /* Restore the correct value */
1622                         *valp = val;
1623                 } else {
1624                         update_defense_level(net_ipvs(net));
1625                 }
1626         }
1627         return rc;
1628 }
1629
1630 static int
1631 proc_do_sync_threshold(ctl_table *table, int write,
1632                        void __user *buffer, size_t *lenp, loff_t *ppos)
1633 {
1634         int *valp = table->data;
1635         int val[2];
1636         int rc;
1637
1638         /* backup the value first */
1639         memcpy(val, valp, sizeof(val));
1640
1641         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1642         if (write && (valp[0] < 0 || valp[1] < 0 ||
1643             (valp[0] >= valp[1] && valp[1]))) {
1644                 /* Restore the correct value */
1645                 memcpy(valp, val, sizeof(val));
1646         }
1647         return rc;
1648 }
1649
1650 static int
1651 proc_do_sync_mode(ctl_table *table, int write,
1652                      void __user *buffer, size_t *lenp, loff_t *ppos)
1653 {
1654         int *valp = table->data;
1655         int val = *valp;
1656         int rc;
1657
1658         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1659         if (write && (*valp != val)) {
1660                 if ((*valp < 0) || (*valp > 1)) {
1661                         /* Restore the correct value */
1662                         *valp = val;
1663                 }
1664         }
1665         return rc;
1666 }
1667
1668 static int
1669 proc_do_sync_ports(ctl_table *table, int write,
1670                    void __user *buffer, size_t *lenp, loff_t *ppos)
1671 {
1672         int *valp = table->data;
1673         int val = *valp;
1674         int rc;
1675
1676         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1677         if (write && (*valp != val)) {
1678                 if (*valp < 1 || !is_power_of_2(*valp)) {
1679                         /* Restore the correct value */
1680                         *valp = val;
1681                 }
1682         }
1683         return rc;
1684 }
1685
1686 /*
1687  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1688  *      Do not change order or insert new entries without
1689  *      align with netns init in ip_vs_control_net_init()
1690  */
1691
1692 static struct ctl_table vs_vars[] = {
1693         {
1694                 .procname       = "amemthresh",
1695                 .maxlen         = sizeof(int),
1696                 .mode           = 0644,
1697                 .proc_handler   = proc_dointvec,
1698         },
1699         {
1700                 .procname       = "am_droprate",
1701                 .maxlen         = sizeof(int),
1702                 .mode           = 0644,
1703                 .proc_handler   = proc_dointvec,
1704         },
1705         {
1706                 .procname       = "drop_entry",
1707                 .maxlen         = sizeof(int),
1708                 .mode           = 0644,
1709                 .proc_handler   = proc_do_defense_mode,
1710         },
1711         {
1712                 .procname       = "drop_packet",
1713                 .maxlen         = sizeof(int),
1714                 .mode           = 0644,
1715                 .proc_handler   = proc_do_defense_mode,
1716         },
1717 #ifdef CONFIG_IP_VS_NFCT
1718         {
1719                 .procname       = "conntrack",
1720                 .maxlen         = sizeof(int),
1721                 .mode           = 0644,
1722                 .proc_handler   = &proc_dointvec,
1723         },
1724 #endif
1725         {
1726                 .procname       = "secure_tcp",
1727                 .maxlen         = sizeof(int),
1728                 .mode           = 0644,
1729                 .proc_handler   = proc_do_defense_mode,
1730         },
1731         {
1732                 .procname       = "snat_reroute",
1733                 .maxlen         = sizeof(int),
1734                 .mode           = 0644,
1735                 .proc_handler   = &proc_dointvec,
1736         },
1737         {
1738                 .procname       = "sync_version",
1739                 .maxlen         = sizeof(int),
1740                 .mode           = 0644,
1741                 .proc_handler   = &proc_do_sync_mode,
1742         },
1743         {
1744                 .procname       = "sync_ports",
1745                 .maxlen         = sizeof(int),
1746                 .mode           = 0644,
1747                 .proc_handler   = &proc_do_sync_ports,
1748         },
1749         {
1750                 .procname       = "sync_qlen_max",
1751                 .maxlen         = sizeof(int),
1752                 .mode           = 0644,
1753                 .proc_handler   = proc_dointvec,
1754         },
1755         {
1756                 .procname       = "sync_sock_size",
1757                 .maxlen         = sizeof(int),
1758                 .mode           = 0644,
1759                 .proc_handler   = proc_dointvec,
1760         },
1761         {
1762                 .procname       = "cache_bypass",
1763                 .maxlen         = sizeof(int),
1764                 .mode           = 0644,
1765                 .proc_handler   = proc_dointvec,
1766         },
1767         {
1768                 .procname       = "expire_nodest_conn",
1769                 .maxlen         = sizeof(int),
1770                 .mode           = 0644,
1771                 .proc_handler   = proc_dointvec,
1772         },
1773         {
1774                 .procname       = "expire_quiescent_template",
1775                 .maxlen         = sizeof(int),
1776                 .mode           = 0644,
1777                 .proc_handler   = proc_dointvec,
1778         },
1779         {
1780                 .procname       = "sync_threshold",
1781                 .maxlen         =
1782                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1783                 .mode           = 0644,
1784                 .proc_handler   = proc_do_sync_threshold,
1785         },
1786         {
1787                 .procname       = "sync_refresh_period",
1788                 .maxlen         = sizeof(int),
1789                 .mode           = 0644,
1790                 .proc_handler   = proc_dointvec_jiffies,
1791         },
1792         {
1793                 .procname       = "sync_retries",
1794                 .maxlen         = sizeof(int),
1795                 .mode           = 0644,
1796                 .proc_handler   = proc_dointvec_minmax,
1797                 .extra1         = &zero,
1798                 .extra2         = &three,
1799         },
1800         {
1801                 .procname       = "nat_icmp_send",
1802                 .maxlen         = sizeof(int),
1803                 .mode           = 0644,
1804                 .proc_handler   = proc_dointvec,
1805         },
1806 #ifdef CONFIG_IP_VS_DEBUG
1807         {
1808                 .procname       = "debug_level",
1809                 .data           = &sysctl_ip_vs_debug_level,
1810                 .maxlen         = sizeof(int),
1811                 .mode           = 0644,
1812                 .proc_handler   = proc_dointvec,
1813         },
1814 #endif
1815 #if 0
1816         {
1817                 .procname       = "timeout_established",
1818                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1819                 .maxlen         = sizeof(int),
1820                 .mode           = 0644,
1821                 .proc_handler   = proc_dointvec_jiffies,
1822         },
1823         {
1824                 .procname       = "timeout_synsent",
1825                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1826                 .maxlen         = sizeof(int),
1827                 .mode           = 0644,
1828                 .proc_handler   = proc_dointvec_jiffies,
1829         },
1830         {
1831                 .procname       = "timeout_synrecv",
1832                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1833                 .maxlen         = sizeof(int),
1834                 .mode           = 0644,
1835                 .proc_handler   = proc_dointvec_jiffies,
1836         },
1837         {
1838                 .procname       = "timeout_finwait",
1839                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1840                 .maxlen         = sizeof(int),
1841                 .mode           = 0644,
1842                 .proc_handler   = proc_dointvec_jiffies,
1843         },
1844         {
1845                 .procname       = "timeout_timewait",
1846                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1847                 .maxlen         = sizeof(int),
1848                 .mode           = 0644,
1849                 .proc_handler   = proc_dointvec_jiffies,
1850         },
1851         {
1852                 .procname       = "timeout_close",
1853                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1854                 .maxlen         = sizeof(int),
1855                 .mode           = 0644,
1856                 .proc_handler   = proc_dointvec_jiffies,
1857         },
1858         {
1859                 .procname       = "timeout_closewait",
1860                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1861                 .maxlen         = sizeof(int),
1862                 .mode           = 0644,
1863                 .proc_handler   = proc_dointvec_jiffies,
1864         },
1865         {
1866                 .procname       = "timeout_lastack",
1867                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1868                 .maxlen         = sizeof(int),
1869                 .mode           = 0644,
1870                 .proc_handler   = proc_dointvec_jiffies,
1871         },
1872         {
1873                 .procname       = "timeout_listen",
1874                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1875                 .maxlen         = sizeof(int),
1876                 .mode           = 0644,
1877                 .proc_handler   = proc_dointvec_jiffies,
1878         },
1879         {
1880                 .procname       = "timeout_synack",
1881                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1882                 .maxlen         = sizeof(int),
1883                 .mode           = 0644,
1884                 .proc_handler   = proc_dointvec_jiffies,
1885         },
1886         {
1887                 .procname       = "timeout_udp",
1888                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1889                 .maxlen         = sizeof(int),
1890                 .mode           = 0644,
1891                 .proc_handler   = proc_dointvec_jiffies,
1892         },
1893         {
1894                 .procname       = "timeout_icmp",
1895                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1896                 .maxlen         = sizeof(int),
1897                 .mode           = 0644,
1898                 .proc_handler   = proc_dointvec_jiffies,
1899         },
1900 #endif
1901         { }
1902 };
1903
1904 #endif
1905
1906 #ifdef CONFIG_PROC_FS
1907
1908 struct ip_vs_iter {
1909         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1910         struct list_head *table;
1911         int bucket;
1912 };
1913
1914 /*
1915  *      Write the contents of the VS rule table to a PROCfs file.
1916  *      (It is kept just for backward compatibility)
1917  */
1918 static inline const char *ip_vs_fwd_name(unsigned int flags)
1919 {
1920         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1921         case IP_VS_CONN_F_LOCALNODE:
1922                 return "Local";
1923         case IP_VS_CONN_F_TUNNEL:
1924                 return "Tunnel";
1925         case IP_VS_CONN_F_DROUTE:
1926                 return "Route";
1927         default:
1928                 return "Masq";
1929         }
1930 }
1931
1932
1933 /* Get the Nth entry in the two lists */
1934 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1935 {
1936         struct net *net = seq_file_net(seq);
1937         struct ip_vs_iter *iter = seq->private;
1938         int idx;
1939         struct ip_vs_service *svc;
1940
1941         /* look in hash by protocol */
1942         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1943                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1944                         if (net_eq(svc->net, net) && pos-- == 0) {
1945                                 iter->table = ip_vs_svc_table;
1946                                 iter->bucket = idx;
1947                                 return svc;
1948                         }
1949                 }
1950         }
1951
1952         /* keep looking in fwmark */
1953         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1954                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1955                         if (net_eq(svc->net, net) && pos-- == 0) {
1956                                 iter->table = ip_vs_svc_fwm_table;
1957                                 iter->bucket = idx;
1958                                 return svc;
1959                         }
1960                 }
1961         }
1962
1963         return NULL;
1964 }
1965
1966 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1967 __acquires(__ip_vs_svc_lock)
1968 {
1969
1970         read_lock_bh(&__ip_vs_svc_lock);
1971         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1972 }
1973
1974
1975 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1976 {
1977         struct list_head *e;
1978         struct ip_vs_iter *iter;
1979         struct ip_vs_service *svc;
1980
1981         ++*pos;
1982         if (v == SEQ_START_TOKEN)
1983                 return ip_vs_info_array(seq,0);
1984
1985         svc = v;
1986         iter = seq->private;
1987
1988         if (iter->table == ip_vs_svc_table) {
1989                 /* next service in table hashed by protocol */
1990                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1991                         return list_entry(e, struct ip_vs_service, s_list);
1992
1993
1994                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1995                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1996                                             s_list) {
1997                                 return svc;
1998                         }
1999                 }
2000
2001                 iter->table = ip_vs_svc_fwm_table;
2002                 iter->bucket = -1;
2003                 goto scan_fwmark;
2004         }
2005
2006         /* next service in hashed by fwmark */
2007         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2008                 return list_entry(e, struct ip_vs_service, f_list);
2009
2010  scan_fwmark:
2011         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2012                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2013                                     f_list)
2014                         return svc;
2015         }
2016
2017         return NULL;
2018 }
2019
2020 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2021 __releases(__ip_vs_svc_lock)
2022 {
2023         read_unlock_bh(&__ip_vs_svc_lock);
2024 }
2025
2026
2027 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2028 {
2029         if (v == SEQ_START_TOKEN) {
2030                 seq_printf(seq,
2031                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2032                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2033                 seq_puts(seq,
2034                          "Prot LocalAddress:Port Scheduler Flags\n");
2035                 seq_puts(seq,
2036                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2037         } else {
2038                 const struct ip_vs_service *svc = v;
2039                 const struct ip_vs_iter *iter = seq->private;
2040                 const struct ip_vs_dest *dest;
2041
2042                 if (iter->table == ip_vs_svc_table) {
2043 #ifdef CONFIG_IP_VS_IPV6
2044                         if (svc->af == AF_INET6)
2045                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2046                                            ip_vs_proto_name(svc->protocol),
2047                                            &svc->addr.in6,
2048                                            ntohs(svc->port),
2049                                            svc->scheduler->name);
2050                         else
2051 #endif
2052                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2053                                            ip_vs_proto_name(svc->protocol),
2054                                            ntohl(svc->addr.ip),
2055                                            ntohs(svc->port),
2056                                            svc->scheduler->name,
2057                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2058                 } else {
2059                         seq_printf(seq, "FWM  %08X %s %s",
2060                                    svc->fwmark, svc->scheduler->name,
2061                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2062                 }
2063
2064                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2065                         seq_printf(seq, "persistent %d %08X\n",
2066                                 svc->timeout,
2067                                 ntohl(svc->netmask));
2068                 else
2069                         seq_putc(seq, '\n');
2070
2071                 list_for_each_entry(dest, &svc->destinations, n_list) {
2072 #ifdef CONFIG_IP_VS_IPV6
2073                         if (dest->af == AF_INET6)
2074                                 seq_printf(seq,
2075                                            "  -> [%pI6]:%04X"
2076                                            "      %-7s %-6d %-10d %-10d\n",
2077                                            &dest->addr.in6,
2078                                            ntohs(dest->port),
2079                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2080                                            atomic_read(&dest->weight),
2081                                            atomic_read(&dest->activeconns),
2082                                            atomic_read(&dest->inactconns));
2083                         else
2084 #endif
2085                                 seq_printf(seq,
2086                                            "  -> %08X:%04X      "
2087                                            "%-7s %-6d %-10d %-10d\n",
2088                                            ntohl(dest->addr.ip),
2089                                            ntohs(dest->port),
2090                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2091                                            atomic_read(&dest->weight),
2092                                            atomic_read(&dest->activeconns),
2093                                            atomic_read(&dest->inactconns));
2094
2095                 }
2096         }
2097         return 0;
2098 }
2099
2100 static const struct seq_operations ip_vs_info_seq_ops = {
2101         .start = ip_vs_info_seq_start,
2102         .next  = ip_vs_info_seq_next,
2103         .stop  = ip_vs_info_seq_stop,
2104         .show  = ip_vs_info_seq_show,
2105 };
2106
2107 static int ip_vs_info_open(struct inode *inode, struct file *file)
2108 {
2109         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2110                         sizeof(struct ip_vs_iter));
2111 }
2112
2113 static const struct file_operations ip_vs_info_fops = {
2114         .owner   = THIS_MODULE,
2115         .open    = ip_vs_info_open,
2116         .read    = seq_read,
2117         .llseek  = seq_lseek,
2118         .release = seq_release_net,
2119 };
2120
2121 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2122 {
2123         struct net *net = seq_file_single_net(seq);
2124         struct ip_vs_stats_user show;
2125
2126 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2127         seq_puts(seq,
2128                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2129         seq_printf(seq,
2130                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2131
2132         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2133         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2134                    show.inpkts, show.outpkts,
2135                    (unsigned long long) show.inbytes,
2136                    (unsigned long long) show.outbytes);
2137
2138 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2139         seq_puts(seq,
2140                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2141         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2142                         show.cps, show.inpps, show.outpps,
2143                         show.inbps, show.outbps);
2144
2145         return 0;
2146 }
2147
2148 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2149 {
2150         return single_open_net(inode, file, ip_vs_stats_show);
2151 }
2152
2153 static const struct file_operations ip_vs_stats_fops = {
2154         .owner = THIS_MODULE,
2155         .open = ip_vs_stats_seq_open,
2156         .read = seq_read,
2157         .llseek = seq_lseek,
2158         .release = single_release_net,
2159 };
2160
2161 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2162 {
2163         struct net *net = seq_file_single_net(seq);
2164         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2165         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2166         struct ip_vs_stats_user rates;
2167         int i;
2168
2169 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2170         seq_puts(seq,
2171                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2172         seq_printf(seq,
2173                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2174
2175         for_each_possible_cpu(i) {
2176                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2177                 unsigned int start;
2178                 __u64 inbytes, outbytes;
2179
2180                 do {
2181                         start = u64_stats_fetch_begin_bh(&u->syncp);
2182                         inbytes = u->ustats.inbytes;
2183                         outbytes = u->ustats.outbytes;
2184                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2185
2186                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2187                            i, u->ustats.conns, u->ustats.inpkts,
2188                            u->ustats.outpkts, (__u64)inbytes,
2189                            (__u64)outbytes);
2190         }
2191
2192         spin_lock_bh(&tot_stats->lock);
2193
2194         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2195                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2196                    tot_stats->ustats.outpkts,
2197                    (unsigned long long) tot_stats->ustats.inbytes,
2198                    (unsigned long long) tot_stats->ustats.outbytes);
2199
2200         ip_vs_read_estimator(&rates, tot_stats);
2201
2202         spin_unlock_bh(&tot_stats->lock);
2203
2204 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2205         seq_puts(seq,
2206                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2207         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2208                         rates.cps,
2209                         rates.inpps,
2210                         rates.outpps,
2211                         rates.inbps,
2212                         rates.outbps);
2213
2214         return 0;
2215 }
2216
2217 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2218 {
2219         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2220 }
2221
2222 static const struct file_operations ip_vs_stats_percpu_fops = {
2223         .owner = THIS_MODULE,
2224         .open = ip_vs_stats_percpu_seq_open,
2225         .read = seq_read,
2226         .llseek = seq_lseek,
2227         .release = single_release_net,
2228 };
2229 #endif
2230
2231 /*
2232  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2233  */
2234 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2235 {
2236 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2237         struct ip_vs_proto_data *pd;
2238 #endif
2239
2240         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2241                   u->tcp_timeout,
2242                   u->tcp_fin_timeout,
2243                   u->udp_timeout);
2244
2245 #ifdef CONFIG_IP_VS_PROTO_TCP
2246         if (u->tcp_timeout) {
2247                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2248                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2249                         = u->tcp_timeout * HZ;
2250         }
2251
2252         if (u->tcp_fin_timeout) {
2253                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2254                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2255                         = u->tcp_fin_timeout * HZ;
2256         }
2257 #endif
2258
2259 #ifdef CONFIG_IP_VS_PROTO_UDP
2260         if (u->udp_timeout) {
2261                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2262                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2263                         = u->udp_timeout * HZ;
2264         }
2265 #endif
2266         return 0;
2267 }
2268
2269
2270 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2271 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2272 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2273                                  sizeof(struct ip_vs_dest_user))
2274 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2275 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2276 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2277
2278 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2279         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2280         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2281         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2282         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2283         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2284         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2285         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2286         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2287         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2288         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2289         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2290 };
2291
2292 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2293                                   struct ip_vs_service_user *usvc_compat)
2294 {
2295         memset(usvc, 0, sizeof(*usvc));
2296
2297         usvc->af                = AF_INET;
2298         usvc->protocol          = usvc_compat->protocol;
2299         usvc->addr.ip           = usvc_compat->addr;
2300         usvc->port              = usvc_compat->port;
2301         usvc->fwmark            = usvc_compat->fwmark;
2302
2303         /* Deep copy of sched_name is not needed here */
2304         usvc->sched_name        = usvc_compat->sched_name;
2305
2306         usvc->flags             = usvc_compat->flags;
2307         usvc->timeout           = usvc_compat->timeout;
2308         usvc->netmask           = usvc_compat->netmask;
2309 }
2310
2311 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2312                                    struct ip_vs_dest_user *udest_compat)
2313 {
2314         memset(udest, 0, sizeof(*udest));
2315
2316         udest->addr.ip          = udest_compat->addr;
2317         udest->port             = udest_compat->port;
2318         udest->conn_flags       = udest_compat->conn_flags;
2319         udest->weight           = udest_compat->weight;
2320         udest->u_threshold      = udest_compat->u_threshold;
2321         udest->l_threshold      = udest_compat->l_threshold;
2322 }
2323
2324 static int
2325 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2326 {
2327         struct net *net = sock_net(sk);
2328         int ret;
2329         unsigned char arg[MAX_ARG_LEN];
2330         struct ip_vs_service_user *usvc_compat;
2331         struct ip_vs_service_user_kern usvc;
2332         struct ip_vs_service *svc;
2333         struct ip_vs_dest_user *udest_compat;
2334         struct ip_vs_dest_user_kern udest;
2335         struct netns_ipvs *ipvs = net_ipvs(net);
2336
2337         if (!capable(CAP_NET_ADMIN))
2338                 return -EPERM;
2339
2340         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2341                 return -EINVAL;
2342         if (len < 0 || len >  MAX_ARG_LEN)
2343                 return -EINVAL;
2344         if (len != set_arglen[SET_CMDID(cmd)]) {
2345                 pr_err("set_ctl: len %u != %u\n",
2346                        len, set_arglen[SET_CMDID(cmd)]);
2347                 return -EINVAL;
2348         }
2349
2350         if (copy_from_user(arg, user, len) != 0)
2351                 return -EFAULT;
2352
2353         /* increase the module use count */
2354         ip_vs_use_count_inc();
2355
2356         /* Handle daemons since they have another lock */
2357         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2358             cmd == IP_VS_SO_SET_STOPDAEMON) {
2359                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2360
2361                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2362                         ret = -ERESTARTSYS;
2363                         goto out_dec;
2364                 }
2365                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2366                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2367                                                 dm->syncid);
2368                 else
2369                         ret = stop_sync_thread(net, dm->state);
2370                 mutex_unlock(&ipvs->sync_mutex);
2371                 goto out_dec;
2372         }
2373
2374         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2375                 ret = -ERESTARTSYS;
2376                 goto out_dec;
2377         }
2378
2379         if (cmd == IP_VS_SO_SET_FLUSH) {
2380                 /* Flush the virtual service */
2381                 ret = ip_vs_flush(net);
2382                 goto out_unlock;
2383         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2384                 /* Set timeout values for (tcp tcpfin udp) */
2385                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2386                 goto out_unlock;
2387         }
2388
2389         usvc_compat = (struct ip_vs_service_user *)arg;
2390         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2391
2392         /* We only use the new structs internally, so copy userspace compat
2393          * structs to extended internal versions */
2394         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2395         ip_vs_copy_udest_compat(&udest, udest_compat);
2396
2397         if (cmd == IP_VS_SO_SET_ZERO) {
2398                 /* if no service address is set, zero counters in all */
2399                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2400                         ret = ip_vs_zero_all(net);
2401                         goto out_unlock;
2402                 }
2403         }
2404
2405         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2406         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2407             usvc.protocol != IPPROTO_SCTP) {
2408                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2409                        usvc.protocol, &usvc.addr.ip,
2410                        ntohs(usvc.port), usvc.sched_name);
2411                 ret = -EFAULT;
2412                 goto out_unlock;
2413         }
2414
2415         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2416         if (usvc.fwmark == 0)
2417                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2418                                            &usvc.addr, usvc.port);
2419         else
2420                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2421
2422         if (cmd != IP_VS_SO_SET_ADD
2423             && (svc == NULL || svc->protocol != usvc.protocol)) {
2424                 ret = -ESRCH;
2425                 goto out_unlock;
2426         }
2427
2428         switch (cmd) {
2429         case IP_VS_SO_SET_ADD:
2430                 if (svc != NULL)
2431                         ret = -EEXIST;
2432                 else
2433                         ret = ip_vs_add_service(net, &usvc, &svc);
2434                 break;
2435         case IP_VS_SO_SET_EDIT:
2436                 ret = ip_vs_edit_service(svc, &usvc);
2437                 break;
2438         case IP_VS_SO_SET_DEL:
2439                 ret = ip_vs_del_service(svc);
2440                 if (!ret)
2441                         goto out_unlock;
2442                 break;
2443         case IP_VS_SO_SET_ZERO:
2444                 ret = ip_vs_zero_service(svc);
2445                 break;
2446         case IP_VS_SO_SET_ADDDEST:
2447                 ret = ip_vs_add_dest(svc, &udest);
2448                 break;
2449         case IP_VS_SO_SET_EDITDEST:
2450                 ret = ip_vs_edit_dest(svc, &udest);
2451                 break;
2452         case IP_VS_SO_SET_DELDEST:
2453                 ret = ip_vs_del_dest(svc, &udest);
2454                 break;
2455         default:
2456                 ret = -EINVAL;
2457         }
2458
2459   out_unlock:
2460         mutex_unlock(&__ip_vs_mutex);
2461   out_dec:
2462         /* decrease the module use count */
2463         ip_vs_use_count_dec();
2464
2465         return ret;
2466 }
2467
2468
2469 static void
2470 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2471 {
2472         dst->protocol = src->protocol;
2473         dst->addr = src->addr.ip;
2474         dst->port = src->port;
2475         dst->fwmark = src->fwmark;
2476         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2477         dst->flags = src->flags;
2478         dst->timeout = src->timeout / HZ;
2479         dst->netmask = src->netmask;
2480         dst->num_dests = src->num_dests;
2481         ip_vs_copy_stats(&dst->stats, &src->stats);
2482 }
2483
2484 static inline int
2485 __ip_vs_get_service_entries(struct net *net,
2486                             const struct ip_vs_get_services *get,
2487                             struct ip_vs_get_services __user *uptr)
2488 {
2489         int idx, count=0;
2490         struct ip_vs_service *svc;
2491         struct ip_vs_service_entry entry;
2492         int ret = 0;
2493
2494         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2495                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2496                         /* Only expose IPv4 entries to old interface */
2497                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2498                                 continue;
2499
2500                         if (count >= get->num_services)
2501                                 goto out;
2502                         memset(&entry, 0, sizeof(entry));
2503                         ip_vs_copy_service(&entry, svc);
2504                         if (copy_to_user(&uptr->entrytable[count],
2505                                          &entry, sizeof(entry))) {
2506                                 ret = -EFAULT;
2507                                 goto out;
2508                         }
2509                         count++;
2510                 }
2511         }
2512
2513         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2514                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2515                         /* Only expose IPv4 entries to old interface */
2516                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2517                                 continue;
2518
2519                         if (count >= get->num_services)
2520                                 goto out;
2521                         memset(&entry, 0, sizeof(entry));
2522                         ip_vs_copy_service(&entry, svc);
2523                         if (copy_to_user(&uptr->entrytable[count],
2524                                          &entry, sizeof(entry))) {
2525                                 ret = -EFAULT;
2526                                 goto out;
2527                         }
2528                         count++;
2529                 }
2530         }
2531 out:
2532         return ret;
2533 }
2534
2535 static inline int
2536 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2537                          struct ip_vs_get_dests __user *uptr)
2538 {
2539         struct ip_vs_service *svc;
2540         union nf_inet_addr addr = { .ip = get->addr };
2541         int ret = 0;
2542
2543         if (get->fwmark)
2544                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2545         else
2546                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2547                                            get->port);
2548
2549         if (svc) {
2550                 int count = 0;
2551                 struct ip_vs_dest *dest;
2552                 struct ip_vs_dest_entry entry;
2553
2554                 list_for_each_entry(dest, &svc->destinations, n_list) {
2555                         if (count >= get->num_dests)
2556                                 break;
2557
2558                         entry.addr = dest->addr.ip;
2559                         entry.port = dest->port;
2560                         entry.conn_flags = atomic_read(&dest->conn_flags);
2561                         entry.weight = atomic_read(&dest->weight);
2562                         entry.u_threshold = dest->u_threshold;
2563                         entry.l_threshold = dest->l_threshold;
2564                         entry.activeconns = atomic_read(&dest->activeconns);
2565                         entry.inactconns = atomic_read(&dest->inactconns);
2566                         entry.persistconns = atomic_read(&dest->persistconns);
2567                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2568                         if (copy_to_user(&uptr->entrytable[count],
2569                                          &entry, sizeof(entry))) {
2570                                 ret = -EFAULT;
2571                                 break;
2572                         }
2573                         count++;
2574                 }
2575         } else
2576                 ret = -ESRCH;
2577         return ret;
2578 }
2579
2580 static inline void
2581 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2582 {
2583 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2584         struct ip_vs_proto_data *pd;
2585 #endif
2586
2587 #ifdef CONFIG_IP_VS_PROTO_TCP
2588         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2589         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2590         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2591 #endif
2592 #ifdef CONFIG_IP_VS_PROTO_UDP
2593         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2594         u->udp_timeout =
2595                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2596 #endif
2597 }
2598
2599
2600 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2601 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2602 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2603 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2604 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2605 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2606 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2607
2608 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2609         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2610         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2611         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2612         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2613         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2614         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2615         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2616 };
2617
2618 static int
2619 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2620 {
2621         unsigned char arg[128];
2622         int ret = 0;
2623         unsigned int copylen;
2624         struct net *net = sock_net(sk);
2625         struct netns_ipvs *ipvs = net_ipvs(net);
2626
2627         BUG_ON(!net);
2628         if (!capable(CAP_NET_ADMIN))
2629                 return -EPERM;
2630
2631         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2632                 return -EINVAL;
2633
2634         if (*len < get_arglen[GET_CMDID(cmd)]) {
2635                 pr_err("get_ctl: len %u < %u\n",
2636                        *len, get_arglen[GET_CMDID(cmd)]);
2637                 return -EINVAL;
2638         }
2639
2640         copylen = get_arglen[GET_CMDID(cmd)];
2641         if (copylen > 128)
2642                 return -EINVAL;
2643
2644         if (copy_from_user(arg, user, copylen) != 0)
2645                 return -EFAULT;
2646         /*
2647          * Handle daemons first since it has its own locking
2648          */
2649         if (cmd == IP_VS_SO_GET_DAEMON) {
2650                 struct ip_vs_daemon_user d[2];
2651
2652                 memset(&d, 0, sizeof(d));
2653                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2654                         return -ERESTARTSYS;
2655
2656                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2657                         d[0].state = IP_VS_STATE_MASTER;
2658                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2659                                 sizeof(d[0].mcast_ifn));
2660                         d[0].syncid = ipvs->master_syncid;
2661                 }
2662                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2663                         d[1].state = IP_VS_STATE_BACKUP;
2664                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2665                                 sizeof(d[1].mcast_ifn));
2666                         d[1].syncid = ipvs->backup_syncid;
2667                 }
2668                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2669                         ret = -EFAULT;
2670                 mutex_unlock(&ipvs->sync_mutex);
2671                 return ret;
2672         }
2673
2674         if (mutex_lock_interruptible(&__ip_vs_mutex))
2675                 return -ERESTARTSYS;
2676
2677         switch (cmd) {
2678         case IP_VS_SO_GET_VERSION:
2679         {
2680                 char buf[64];
2681
2682                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2683                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2684                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2685                         ret = -EFAULT;
2686                         goto out;
2687                 }
2688                 *len = strlen(buf)+1;
2689         }
2690         break;
2691
2692         case IP_VS_SO_GET_INFO:
2693         {
2694                 struct ip_vs_getinfo info;
2695                 info.version = IP_VS_VERSION_CODE;
2696                 info.size = ip_vs_conn_tab_size;
2697                 info.num_services = ipvs->num_services;
2698                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2699                         ret = -EFAULT;
2700         }
2701         break;
2702
2703         case IP_VS_SO_GET_SERVICES:
2704         {
2705                 struct ip_vs_get_services *get;
2706                 int size;
2707
2708                 get = (struct ip_vs_get_services *)arg;
2709                 size = sizeof(*get) +
2710                         sizeof(struct ip_vs_service_entry) * get->num_services;
2711                 if (*len != size) {
2712                         pr_err("length: %u != %u\n", *len, size);
2713                         ret = -EINVAL;
2714                         goto out;
2715                 }
2716                 ret = __ip_vs_get_service_entries(net, get, user);
2717         }
2718         break;
2719
2720         case IP_VS_SO_GET_SERVICE:
2721         {
2722                 struct ip_vs_service_entry *entry;
2723                 struct ip_vs_service *svc;
2724                 union nf_inet_addr addr;
2725
2726                 entry = (struct ip_vs_service_entry *)arg;
2727                 addr.ip = entry->addr;
2728                 if (entry->fwmark)
2729                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2730                 else
2731                         svc = __ip_vs_service_find(net, AF_INET,
2732                                                    entry->protocol, &addr,
2733                                                    entry->port);
2734                 if (svc) {
2735                         ip_vs_copy_service(entry, svc);
2736                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2737                                 ret = -EFAULT;
2738                 } else
2739                         ret = -ESRCH;
2740         }
2741         break;
2742
2743         case IP_VS_SO_GET_DESTS:
2744         {
2745                 struct ip_vs_get_dests *get;
2746                 int size;
2747
2748                 get = (struct ip_vs_get_dests *)arg;
2749                 size = sizeof(*get) +
2750                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2751                 if (*len != size) {
2752                         pr_err("length: %u != %u\n", *len, size);
2753                         ret = -EINVAL;
2754                         goto out;
2755                 }
2756                 ret = __ip_vs_get_dest_entries(net, get, user);
2757         }
2758         break;
2759
2760         case IP_VS_SO_GET_TIMEOUT:
2761         {
2762                 struct ip_vs_timeout_user t;
2763
2764                 memset(&t, 0, sizeof(t));
2765                 __ip_vs_get_timeouts(net, &t);
2766                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2767                         ret = -EFAULT;
2768         }
2769         break;
2770
2771         default:
2772                 ret = -EINVAL;
2773         }
2774
2775 out:
2776         mutex_unlock(&__ip_vs_mutex);
2777         return ret;
2778 }
2779
2780
2781 static struct nf_sockopt_ops ip_vs_sockopts = {
2782         .pf             = PF_INET,
2783         .set_optmin     = IP_VS_BASE_CTL,
2784         .set_optmax     = IP_VS_SO_SET_MAX+1,
2785         .set            = do_ip_vs_set_ctl,
2786         .get_optmin     = IP_VS_BASE_CTL,
2787         .get_optmax     = IP_VS_SO_GET_MAX+1,
2788         .get            = do_ip_vs_get_ctl,
2789         .owner          = THIS_MODULE,
2790 };
2791
2792 /*
2793  * Generic Netlink interface
2794  */
2795
2796 /* IPVS genetlink family */
2797 static struct genl_family ip_vs_genl_family = {
2798         .id             = GENL_ID_GENERATE,
2799         .hdrsize        = 0,
2800         .name           = IPVS_GENL_NAME,
2801         .version        = IPVS_GENL_VERSION,
2802         .maxattr        = IPVS_CMD_MAX,
2803         .netnsok        = true,         /* Make ipvsadm to work on netns */
2804 };
2805
2806 /* Policy used for first-level command attributes */
2807 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2808         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2809         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2810         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2811         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2812         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2813         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2814 };
2815
2816 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2817 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2818         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2819         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2820                                             .len = IP_VS_IFNAME_MAXLEN },
2821         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2822 };
2823
2824 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2825 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2826         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2827         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2828         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2829                                             .len = sizeof(union nf_inet_addr) },
2830         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2831         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2832         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2833                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2834         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2835                                             .len = IP_VS_PENAME_MAXLEN },
2836         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2837                                             .len = sizeof(struct ip_vs_flags) },
2838         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2839         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2840         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2841 };
2842
2843 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2844 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2845         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2846                                             .len = sizeof(union nf_inet_addr) },
2847         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2848         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2849         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2850         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2851         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2852         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2853         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2854         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2855         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2856 };
2857
2858 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2859                                  struct ip_vs_stats *stats)
2860 {
2861         struct ip_vs_stats_user ustats;
2862         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2863         if (!nl_stats)
2864                 return -EMSGSIZE;
2865
2866         ip_vs_copy_stats(&ustats, stats);
2867
2868         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2869             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2870             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2871             nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2872             nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2873             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2874             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2875             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2876             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2877             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2878                 goto nla_put_failure;
2879         nla_nest_end(skb, nl_stats);
2880
2881         return 0;
2882
2883 nla_put_failure:
2884         nla_nest_cancel(skb, nl_stats);
2885         return -EMSGSIZE;
2886 }
2887
2888 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2889                                    struct ip_vs_service *svc)
2890 {
2891         struct nlattr *nl_service;
2892         struct ip_vs_flags flags = { .flags = svc->flags,
2893                                      .mask = ~0 };
2894
2895         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2896         if (!nl_service)
2897                 return -EMSGSIZE;
2898
2899         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2900                 goto nla_put_failure;
2901         if (svc->fwmark) {
2902                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2903                         goto nla_put_failure;
2904         } else {
2905                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2906                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2907                     nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2908                         goto nla_put_failure;
2909         }
2910
2911         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2912             (svc->pe &&
2913              nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2914             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2915             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2916             nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2917                 goto nla_put_failure;
2918         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2919                 goto nla_put_failure;
2920
2921         nla_nest_end(skb, nl_service);
2922
2923         return 0;
2924
2925 nla_put_failure:
2926         nla_nest_cancel(skb, nl_service);
2927         return -EMSGSIZE;
2928 }
2929
2930 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2931                                    struct ip_vs_service *svc,
2932                                    struct netlink_callback *cb)
2933 {
2934         void *hdr;
2935
2936         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2937                           &ip_vs_genl_family, NLM_F_MULTI,
2938                           IPVS_CMD_NEW_SERVICE);
2939         if (!hdr)
2940                 return -EMSGSIZE;
2941
2942         if (ip_vs_genl_fill_service(skb, svc) < 0)
2943                 goto nla_put_failure;
2944
2945         return genlmsg_end(skb, hdr);
2946
2947 nla_put_failure:
2948         genlmsg_cancel(skb, hdr);
2949         return -EMSGSIZE;
2950 }
2951
2952 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2953                                     struct netlink_callback *cb)
2954 {
2955         int idx = 0, i;
2956         int start = cb->args[0];
2957         struct ip_vs_service *svc;
2958         struct net *net = skb_sknet(skb);
2959
2960         mutex_lock(&__ip_vs_mutex);
2961         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2962                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2963                         if (++idx <= start || !net_eq(svc->net, net))
2964                                 continue;
2965                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2966                                 idx--;
2967                                 goto nla_put_failure;
2968                         }
2969                 }
2970         }
2971
2972         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2973                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2974                         if (++idx <= start || !net_eq(svc->net, net))
2975                                 continue;
2976                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2977                                 idx--;
2978                                 goto nla_put_failure;
2979                         }
2980                 }
2981         }
2982
2983 nla_put_failure:
2984         mutex_unlock(&__ip_vs_mutex);
2985         cb->args[0] = idx;
2986
2987         return skb->len;
2988 }
2989
2990 static int ip_vs_genl_parse_service(struct net *net,
2991                                     struct ip_vs_service_user_kern *usvc,
2992                                     struct nlattr *nla, int full_entry,
2993                                     struct ip_vs_service **ret_svc)
2994 {
2995         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2996         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2997         struct ip_vs_service *svc;
2998
2999         /* Parse mandatory identifying service fields first */
3000         if (nla == NULL ||
3001             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3002                 return -EINVAL;
3003
3004         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3005         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3006         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3007         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3008         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3009
3010         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3011                 return -EINVAL;
3012
3013         memset(usvc, 0, sizeof(*usvc));
3014
3015         usvc->af = nla_get_u16(nla_af);
3016 #ifdef CONFIG_IP_VS_IPV6
3017         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3018 #else
3019         if (usvc->af != AF_INET)
3020 #endif
3021                 return -EAFNOSUPPORT;
3022
3023         if (nla_fwmark) {
3024                 usvc->protocol = IPPROTO_TCP;
3025                 usvc->fwmark = nla_get_u32(nla_fwmark);
3026         } else {
3027                 usvc->protocol = nla_get_u16(nla_protocol);
3028                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3029                 usvc->port = nla_get_u16(nla_port);
3030                 usvc->fwmark = 0;
3031         }
3032
3033         if (usvc->fwmark)
3034                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3035         else
3036                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3037                                            &usvc->addr, usvc->port);
3038         *ret_svc = svc;
3039
3040         /* If a full entry was requested, check for the additional fields */
3041         if (full_entry) {
3042                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3043                               *nla_netmask;
3044                 struct ip_vs_flags flags;
3045
3046                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3047                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3048                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3049                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3050                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3051
3052                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3053                         return -EINVAL;
3054
3055                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3056
3057                 /* prefill flags from service if it already exists */
3058                 if (svc)
3059                         usvc->flags = svc->flags;
3060
3061                 /* set new flags from userland */
3062                 usvc->flags = (usvc->flags & ~flags.mask) |
3063                               (flags.flags & flags.mask);
3064                 usvc->sched_name = nla_data(nla_sched);
3065                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3066                 usvc->timeout = nla_get_u32(nla_timeout);
3067                 usvc->netmask = nla_get_u32(nla_netmask);
3068         }
3069
3070         return 0;
3071 }
3072
3073 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3074                                                      struct nlattr *nla)
3075 {
3076         struct ip_vs_service_user_kern usvc;
3077         struct ip_vs_service *svc;
3078         int ret;
3079
3080         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3081         return ret ? ERR_PTR(ret) : svc;
3082 }
3083
3084 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3085 {
3086         struct nlattr *nl_dest;
3087
3088         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3089         if (!nl_dest)
3090                 return -EMSGSIZE;
3091
3092         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3093             nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3094             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3095                         (atomic_read(&dest->conn_flags) &
3096                          IP_VS_CONN_F_FWD_MASK)) ||
3097             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3098                         atomic_read(&dest->weight)) ||
3099             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3100             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3101             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3102                         atomic_read(&dest->activeconns)) ||
3103             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3104                         atomic_read(&dest->inactconns)) ||
3105             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3106                         atomic_read(&dest->persistconns)))
3107                 goto nla_put_failure;
3108         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3109                 goto nla_put_failure;
3110
3111         nla_nest_end(skb, nl_dest);
3112
3113         return 0;
3114
3115 nla_put_failure:
3116         nla_nest_cancel(skb, nl_dest);
3117         return -EMSGSIZE;
3118 }
3119
3120 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3121                                 struct netlink_callback *cb)
3122 {
3123         void *hdr;
3124
3125         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3126                           &ip_vs_genl_family, NLM_F_MULTI,
3127                           IPVS_CMD_NEW_DEST);
3128         if (!hdr)
3129                 return -EMSGSIZE;
3130
3131         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3132                 goto nla_put_failure;
3133
3134         return genlmsg_end(skb, hdr);
3135
3136 nla_put_failure:
3137         genlmsg_cancel(skb, hdr);
3138         return -EMSGSIZE;
3139 }
3140
3141 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3142                                  struct netlink_callback *cb)
3143 {
3144         int idx = 0;
3145         int start = cb->args[0];
3146         struct ip_vs_service *svc;
3147         struct ip_vs_dest *dest;
3148         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3149         struct net *net = skb_sknet(skb);
3150
3151         mutex_lock(&__ip_vs_mutex);
3152
3153         /* Try to find the service for which to dump destinations */
3154         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3155                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3156                 goto out_err;
3157
3158
3159         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3160         if (IS_ERR(svc) || svc == NULL)
3161                 goto out_err;
3162
3163         /* Dump the destinations */
3164         list_for_each_entry(dest, &svc->destinations, n_list) {
3165                 if (++idx <= start)
3166                         continue;
3167                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3168                         idx--;
3169                         goto nla_put_failure;
3170                 }
3171         }
3172
3173 nla_put_failure:
3174         cb->args[0] = idx;
3175
3176 out_err:
3177         mutex_unlock(&__ip_vs_mutex);
3178
3179         return skb->len;
3180 }
3181
3182 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3183                                  struct nlattr *nla, int full_entry)
3184 {
3185         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3186         struct nlattr *nla_addr, *nla_port;
3187
3188         /* Parse mandatory identifying destination fields first */
3189         if (nla == NULL ||
3190             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3191                 return -EINVAL;
3192
3193         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3194         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3195
3196         if (!(nla_addr && nla_port))
3197                 return -EINVAL;
3198
3199         memset(udest, 0, sizeof(*udest));
3200
3201         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3202         udest->port = nla_get_u16(nla_port);
3203
3204         /* If a full entry was requested, check for the additional fields */
3205         if (full_entry) {
3206                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3207                               *nla_l_thresh;
3208
3209                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3210                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3211                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3212                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3213
3214                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3215                         return -EINVAL;
3216
3217                 udest->conn_flags = nla_get_u32(nla_fwd)
3218                                     & IP_VS_CONN_F_FWD_MASK;
3219                 udest->weight = nla_get_u32(nla_weight);
3220                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3221                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3222         }
3223
3224         return 0;
3225 }
3226
3227 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3228                                   const char *mcast_ifn, __be32 syncid)
3229 {
3230         struct nlattr *nl_daemon;
3231
3232         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3233         if (!nl_daemon)
3234                 return -EMSGSIZE;
3235
3236         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3237             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3238             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3239                 goto nla_put_failure;
3240         nla_nest_end(skb, nl_daemon);
3241
3242         return 0;
3243
3244 nla_put_failure:
3245         nla_nest_cancel(skb, nl_daemon);
3246         return -EMSGSIZE;
3247 }
3248
3249 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3250                                   const char *mcast_ifn, __be32 syncid,
3251                                   struct netlink_callback *cb)
3252 {
3253         void *hdr;
3254         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3255                           &ip_vs_genl_family, NLM_F_MULTI,
3256                           IPVS_CMD_NEW_DAEMON);
3257         if (!hdr)
3258                 return -EMSGSIZE;
3259
3260         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3261                 goto nla_put_failure;
3262
3263         return genlmsg_end(skb, hdr);
3264
3265 nla_put_failure:
3266         genlmsg_cancel(skb, hdr);
3267         return -EMSGSIZE;
3268 }
3269
3270 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3271                                    struct netlink_callback *cb)
3272 {
3273         struct net *net = skb_sknet(skb);
3274         struct netns_ipvs *ipvs = net_ipvs(net);
3275
3276         mutex_lock(&ipvs->sync_mutex);
3277         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3278                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3279                                            ipvs->master_mcast_ifn,
3280                                            ipvs->master_syncid, cb) < 0)
3281                         goto nla_put_failure;
3282
3283                 cb->args[0] = 1;
3284         }
3285
3286         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3287                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3288                                            ipvs->backup_mcast_ifn,
3289                                            ipvs->backup_syncid, cb) < 0)
3290                         goto nla_put_failure;
3291
3292                 cb->args[1] = 1;
3293         }
3294
3295 nla_put_failure:
3296         mutex_unlock(&ipvs->sync_mutex);
3297
3298         return skb->len;
3299 }
3300
3301 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3302 {
3303         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3304               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3305               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3306                 return -EINVAL;
3307
3308         return start_sync_thread(net,
3309                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3310                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3311                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3312 }
3313
3314 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3315 {
3316         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3317                 return -EINVAL;
3318
3319         return stop_sync_thread(net,
3320                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3321 }
3322
3323 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3324 {
3325         struct ip_vs_timeout_user t;
3326
3327         __ip_vs_get_timeouts(net, &t);
3328
3329         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3330                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3331
3332         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3333                 t.tcp_fin_timeout =
3334                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3335
3336         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3337                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3338
3339         return ip_vs_set_timeout(net, &t);
3340 }
3341
3342 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3343 {
3344         int ret = 0, cmd;
3345         struct net *net;
3346         struct netns_ipvs *ipvs;
3347
3348         net = skb_sknet(skb);
3349         ipvs = net_ipvs(net);
3350         cmd = info->genlhdr->cmd;
3351
3352         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3353                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3354
3355                 mutex_lock(&ipvs->sync_mutex);
3356                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3357                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3358                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3359                                      ip_vs_daemon_policy)) {
3360                         ret = -EINVAL;
3361                         goto out;
3362                 }
3363
3364                 if (cmd == IPVS_CMD_NEW_DAEMON)
3365                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3366                 else
3367                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3368 out:
3369                 mutex_unlock(&ipvs->sync_mutex);
3370         }
3371         return ret;
3372 }
3373
3374 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3375 {
3376         struct ip_vs_service *svc = NULL;
3377         struct ip_vs_service_user_kern usvc;
3378         struct ip_vs_dest_user_kern udest;
3379         int ret = 0, cmd;
3380         int need_full_svc = 0, need_full_dest = 0;
3381         struct net *net;
3382
3383         net = skb_sknet(skb);
3384         cmd = info->genlhdr->cmd;
3385
3386         mutex_lock(&__ip_vs_mutex);
3387
3388         if (cmd == IPVS_CMD_FLUSH) {
3389                 ret = ip_vs_flush(net);
3390                 goto out;
3391         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3392                 ret = ip_vs_genl_set_config(net, info->attrs);
3393                 goto out;
3394         } else if (cmd == IPVS_CMD_ZERO &&
3395                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3396                 ret = ip_vs_zero_all(net);
3397                 goto out;
3398         }
3399
3400         /* All following commands require a service argument, so check if we
3401          * received a valid one. We need a full service specification when
3402          * adding / editing a service. Only identifying members otherwise. */
3403         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3404                 need_full_svc = 1;
3405
3406         ret = ip_vs_genl_parse_service(net, &usvc,
3407                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3408                                        need_full_svc, &svc);
3409         if (ret)
3410                 goto out;
3411
3412         /* Unless we're adding a new service, the service must already exist */
3413         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3414                 ret = -ESRCH;
3415                 goto out;
3416         }
3417
3418         /* Destination commands require a valid destination argument. For
3419          * adding / editing a destination, we need a full destination
3420          * specification. */
3421         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3422             cmd == IPVS_CMD_DEL_DEST) {
3423                 if (cmd != IPVS_CMD_DEL_DEST)
3424                         need_full_dest = 1;
3425
3426                 ret = ip_vs_genl_parse_dest(&udest,
3427                                             info->attrs[IPVS_CMD_ATTR_DEST],
3428                                             need_full_dest);
3429                 if (ret)
3430                         goto out;
3431         }
3432
3433         switch (cmd) {
3434         case IPVS_CMD_NEW_SERVICE:
3435                 if (svc == NULL)
3436                         ret = ip_vs_add_service(net, &usvc, &svc);
3437                 else
3438                         ret = -EEXIST;
3439                 break;
3440         case IPVS_CMD_SET_SERVICE:
3441                 ret = ip_vs_edit_service(svc, &usvc);
3442                 break;
3443         case IPVS_CMD_DEL_SERVICE:
3444                 ret = ip_vs_del_service(svc);
3445                 /* do not use svc, it can be freed */
3446                 break;
3447         case IPVS_CMD_NEW_DEST:
3448                 ret = ip_vs_add_dest(svc, &udest);
3449                 break;
3450         case IPVS_CMD_SET_DEST:
3451                 ret = ip_vs_edit_dest(svc, &udest);
3452                 break;
3453         case IPVS_CMD_DEL_DEST:
3454                 ret = ip_vs_del_dest(svc, &udest);
3455                 break;
3456         case IPVS_CMD_ZERO:
3457                 ret = ip_vs_zero_service(svc);
3458                 break;
3459         default:
3460                 ret = -EINVAL;
3461         }
3462
3463 out:
3464         mutex_unlock(&__ip_vs_mutex);
3465
3466         return ret;
3467 }
3468
3469 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3470 {
3471         struct sk_buff *msg;
3472         void *reply;
3473         int ret, cmd, reply_cmd;
3474         struct net *net;
3475
3476         net = skb_sknet(skb);
3477         cmd = info->genlhdr->cmd;
3478
3479         if (cmd == IPVS_CMD_GET_SERVICE)
3480                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3481         else if (cmd == IPVS_CMD_GET_INFO)
3482                 reply_cmd = IPVS_CMD_SET_INFO;
3483         else if (cmd == IPVS_CMD_GET_CONFIG)
3484                 reply_cmd = IPVS_CMD_SET_CONFIG;
3485         else {
3486                 pr_err("unknown Generic Netlink command\n");
3487                 return -EINVAL;
3488         }
3489
3490         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3491         if (!msg)
3492                 return -ENOMEM;
3493
3494         mutex_lock(&__ip_vs_mutex);
3495
3496         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3497         if (reply == NULL)
3498                 goto nla_put_failure;
3499
3500         switch (cmd) {
3501         case IPVS_CMD_GET_SERVICE:
3502         {
3503                 struct ip_vs_service *svc;
3504
3505                 svc = ip_vs_genl_find_service(net,
3506                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3507                 if (IS_ERR(svc)) {
3508                         ret = PTR_ERR(svc);
3509                         goto out_err;
3510                 } else if (svc) {
3511                         ret = ip_vs_genl_fill_service(msg, svc);
3512                         if (ret)
3513                                 goto nla_put_failure;
3514                 } else {
3515                         ret = -ESRCH;
3516                         goto out_err;
3517                 }
3518
3519                 break;
3520         }
3521
3522         case IPVS_CMD_GET_CONFIG:
3523         {
3524                 struct ip_vs_timeout_user t;
3525
3526                 __ip_vs_get_timeouts(net, &t);
3527 #ifdef CONFIG_IP_VS_PROTO_TCP
3528                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3529                                 t.tcp_timeout) ||
3530                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3531                                 t.tcp_fin_timeout))
3532                         goto nla_put_failure;
3533 #endif
3534 #ifdef CONFIG_IP_VS_PROTO_UDP
3535                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3536                         goto nla_put_failure;
3537 #endif
3538
3539                 break;
3540         }
3541
3542         case IPVS_CMD_GET_INFO:
3543                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3544                                 IP_VS_VERSION_CODE) ||
3545                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3546                                 ip_vs_conn_tab_size))
3547                         goto nla_put_failure;
3548                 break;
3549         }
3550
3551         genlmsg_end(msg, reply);
3552         ret = genlmsg_reply(msg, info);
3553         goto out;
3554
3555 nla_put_failure:
3556         pr_err("not enough space in Netlink message\n");
3557         ret = -EMSGSIZE;
3558
3559 out_err:
3560         nlmsg_free(msg);
3561 out:
3562         mutex_unlock(&__ip_vs_mutex);
3563
3564         return ret;
3565 }
3566
3567
3568 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3569         {
3570                 .cmd    = IPVS_CMD_NEW_SERVICE,
3571                 .flags  = GENL_ADMIN_PERM,
3572                 .policy = ip_vs_cmd_policy,
3573                 .doit   = ip_vs_genl_set_cmd,
3574         },
3575         {
3576                 .cmd    = IPVS_CMD_SET_SERVICE,
3577                 .flags  = GENL_ADMIN_PERM,
3578                 .policy = ip_vs_cmd_policy,
3579                 .doit   = ip_vs_genl_set_cmd,
3580         },
3581         {
3582                 .cmd    = IPVS_CMD_DEL_SERVICE,
3583                 .flags  = GENL_ADMIN_PERM,
3584                 .policy = ip_vs_cmd_policy,
3585                 .doit   = ip_vs_genl_set_cmd,
3586         },
3587         {
3588                 .cmd    = IPVS_CMD_GET_SERVICE,
3589                 .flags  = GENL_ADMIN_PERM,
3590                 .doit   = ip_vs_genl_get_cmd,
3591                 .dumpit = ip_vs_genl_dump_services,
3592                 .policy = ip_vs_cmd_policy,
3593         },
3594         {
3595                 .cmd    = IPVS_CMD_NEW_DEST,
3596                 .flags  = GENL_ADMIN_PERM,
3597                 .policy = ip_vs_cmd_policy,
3598                 .doit   = ip_vs_genl_set_cmd,
3599         },
3600         {
3601                 .cmd    = IPVS_CMD_SET_DEST,
3602                 .flags  = GENL_ADMIN_PERM,
3603                 .policy = ip_vs_cmd_policy,
3604                 .doit   = ip_vs_genl_set_cmd,
3605         },
3606         {
3607                 .cmd    = IPVS_CMD_DEL_DEST,
3608                 .flags  = GENL_ADMIN_PERM,
3609                 .policy = ip_vs_cmd_policy,
3610                 .doit   = ip_vs_genl_set_cmd,
3611         },
3612         {
3613                 .cmd    = IPVS_CMD_GET_DEST,
3614                 .flags  = GENL_ADMIN_PERM,
3615                 .policy = ip_vs_cmd_policy,
3616                 .dumpit = ip_vs_genl_dump_dests,
3617         },
3618         {
3619                 .cmd    = IPVS_CMD_NEW_DAEMON,
3620                 .flags  = GENL_ADMIN_PERM,
3621                 .policy = ip_vs_cmd_policy,
3622                 .doit   = ip_vs_genl_set_daemon,
3623         },
3624         {
3625                 .cmd    = IPVS_CMD_DEL_DAEMON,
3626                 .flags  = GENL_ADMIN_PERM,
3627                 .policy = ip_vs_cmd_policy,
3628                 .doit   = ip_vs_genl_set_daemon,
3629         },
3630         {
3631                 .cmd    = IPVS_CMD_GET_DAEMON,
3632                 .flags  = GENL_ADMIN_PERM,
3633                 .dumpit = ip_vs_genl_dump_daemons,
3634         },
3635         {
3636                 .cmd    = IPVS_CMD_SET_CONFIG,
3637                 .flags  = GENL_ADMIN_PERM,
3638                 .policy = ip_vs_cmd_policy,
3639                 .doit   = ip_vs_genl_set_cmd,
3640         },
3641         {
3642                 .cmd    = IPVS_CMD_GET_CONFIG,
3643                 .flags  = GENL_ADMIN_PERM,
3644                 .doit   = ip_vs_genl_get_cmd,
3645         },
3646         {
3647                 .cmd    = IPVS_CMD_GET_INFO,
3648                 .flags  = GENL_ADMIN_PERM,
3649                 .doit   = ip_vs_genl_get_cmd,
3650         },
3651         {
3652                 .cmd    = IPVS_CMD_ZERO,
3653                 .flags  = GENL_ADMIN_PERM,
3654                 .policy = ip_vs_cmd_policy,
3655                 .doit   = ip_vs_genl_set_cmd,
3656         },
3657         {
3658                 .cmd    = IPVS_CMD_FLUSH,
3659                 .flags  = GENL_ADMIN_PERM,
3660                 .doit   = ip_vs_genl_set_cmd,
3661         },
3662 };
3663
3664 static int __init ip_vs_genl_register(void)
3665 {
3666         return genl_register_family_with_ops(&ip_vs_genl_family,
3667                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3668 }
3669
3670 static void ip_vs_genl_unregister(void)
3671 {
3672         genl_unregister_family(&ip_vs_genl_family);
3673 }
3674
3675 /* End of Generic Netlink interface definitions */
3676
3677 /*
3678  * per netns intit/exit func.
3679  */
3680 #ifdef CONFIG_SYSCTL
3681 int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3682 {
3683         int idx;
3684         struct netns_ipvs *ipvs = net_ipvs(net);
3685         struct ctl_table *tbl;
3686
3687         atomic_set(&ipvs->dropentry, 0);
3688         spin_lock_init(&ipvs->dropentry_lock);
3689         spin_lock_init(&ipvs->droppacket_lock);
3690         spin_lock_init(&ipvs->securetcp_lock);
3691
3692         if (!net_eq(net, &init_net)) {
3693                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3694                 if (tbl == NULL)
3695                         return -ENOMEM;
3696         } else
3697                 tbl = vs_vars;
3698         /* Initialize sysctl defaults */
3699         idx = 0;
3700         ipvs->sysctl_amemthresh = 1024;
3701         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3702         ipvs->sysctl_am_droprate = 10;
3703         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3704         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3705         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3706 #ifdef CONFIG_IP_VS_NFCT
3707         tbl[idx++].data = &ipvs->sysctl_conntrack;
3708 #endif
3709         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3710         ipvs->sysctl_snat_reroute = 1;
3711         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3712         ipvs->sysctl_sync_ver = 1;
3713         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3714         ipvs->sysctl_sync_ports = 1;
3715         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3716         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3717         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3718         ipvs->sysctl_sync_sock_size = 0;
3719         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3720         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3721         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3722         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3723         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3724         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3725         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3726         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3727         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3728         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3729         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3730         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3731         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3732
3733
3734         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3735         if (ipvs->sysctl_hdr == NULL) {
3736                 if (!net_eq(net, &init_net))
3737                         kfree(tbl);
3738                 return -ENOMEM;
3739         }
3740         ip_vs_start_estimator(net, &ipvs->tot_stats);
3741         ipvs->sysctl_tbl = tbl;
3742         /* Schedule defense work */
3743         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3744         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3745
3746         return 0;
3747 }
3748
3749 void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3750 {
3751         struct netns_ipvs *ipvs = net_ipvs(net);
3752
3753         cancel_delayed_work_sync(&ipvs->defense_work);
3754         cancel_work_sync(&ipvs->defense_work.work);
3755         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3756 }
3757
3758 #else
3759
3760 int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3761 void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3762
3763 #endif
3764
3765 static struct notifier_block ip_vs_dst_notifier = {
3766         .notifier_call = ip_vs_dst_event,
3767 };
3768
3769 int __net_init ip_vs_control_net_init(struct net *net)
3770 {
3771         int idx;
3772         struct netns_ipvs *ipvs = net_ipvs(net);
3773
3774         rwlock_init(&ipvs->rs_lock);
3775
3776         /* Initialize rs_table */
3777         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3778                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3779
3780         INIT_LIST_HEAD(&ipvs->dest_trash);
3781         atomic_set(&ipvs->ftpsvc_counter, 0);
3782         atomic_set(&ipvs->nullsvc_counter, 0);
3783
3784         /* procfs stats */
3785         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3786         if (!ipvs->tot_stats.cpustats)
3787                 return -ENOMEM;
3788
3789         spin_lock_init(&ipvs->tot_stats.lock);
3790
3791         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3792         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3793         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3794                              &ip_vs_stats_percpu_fops);
3795
3796         if (ip_vs_control_net_init_sysctl(net))
3797                 goto err;
3798
3799         return 0;
3800
3801 err:
3802         free_percpu(ipvs->tot_stats.cpustats);
3803         return -ENOMEM;
3804 }
3805
3806 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3807 {
3808         struct netns_ipvs *ipvs = net_ipvs(net);
3809
3810         ip_vs_trash_cleanup(net);
3811         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3812         ip_vs_control_net_cleanup_sysctl(net);
3813         proc_net_remove(net, "ip_vs_stats_percpu");
3814         proc_net_remove(net, "ip_vs_stats");
3815         proc_net_remove(net, "ip_vs");
3816         free_percpu(ipvs->tot_stats.cpustats);
3817 }
3818
3819 int __init ip_vs_register_nl_ioctl(void)
3820 {
3821         int ret;
3822
3823         ret = nf_register_sockopt(&ip_vs_sockopts);
3824         if (ret) {
3825                 pr_err("cannot register sockopt.\n");
3826                 goto err_sock;
3827         }
3828
3829         ret = ip_vs_genl_register();
3830         if (ret) {
3831                 pr_err("cannot register Generic Netlink interface.\n");
3832                 goto err_genl;
3833         }
3834         return 0;
3835
3836 err_genl:
3837         nf_unregister_sockopt(&ip_vs_sockopts);
3838 err_sock:
3839         return ret;
3840 }
3841
3842 void ip_vs_unregister_nl_ioctl(void)
3843 {
3844         ip_vs_genl_unregister();
3845         nf_unregister_sockopt(&ip_vs_sockopts);
3846 }
3847
3848 int __init ip_vs_control_init(void)
3849 {
3850         int idx;
3851         int ret;
3852
3853         EnterFunction(2);
3854
3855         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3856         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3857                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3858                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3859         }
3860
3861         smp_wmb();      /* Do we really need it now ? */
3862
3863         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3864         if (ret < 0)
3865                 return ret;
3866
3867         LeaveFunction(2);
3868         return 0;
3869 }
3870
3871
3872 void ip_vs_control_cleanup(void)
3873 {
3874         EnterFunction(2);
3875         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3876         LeaveFunction(2);
3877 }