Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80                                      const struct in6_addr *addr)
81 {
82         struct flowi6 fl6 = {
83                 .daddr = *addr,
84         };
85         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86         bool is_local;
87
88         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90         dst_release(dst);
91         return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned int porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276         if (af == AF_INET6)
277                 addr_fold = addr->ip6[0]^addr->ip6[1]^
278                             addr->ip6[2]^addr->ip6[3];
279 #endif
280         addr_fold ^= ((size_t)net>>8);
281
282         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283                 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *      or in the ip_vs_svc_fwm_table by fwmark.
297  *      Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301         unsigned int hash;
302
303         if (svc->flags & IP_VS_SVC_F_HASHED) {
304                 pr_err("%s(): request for already hashed, called from %pF\n",
305                        __func__, __builtin_return_address(0));
306                 return 0;
307         }
308
309         if (svc->fwmark == 0) {
310                 /*
311                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312                  */
313                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314                                          &svc->addr, svc->port);
315                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316         } else {
317                 /*
318                  *  Hash it by fwmark in svc_fwm_table
319                  */
320                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322         }
323
324         svc->flags |= IP_VS_SVC_F_HASHED;
325         /* increase its refcnt because it is referenced by the svc table */
326         atomic_inc(&svc->refcnt);
327         return 1;
328 }
329
330
331 /*
332  *      Unhashes a service from svc_table / svc_fwm_table.
333  *      Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338                 pr_err("%s(): request for unhash flagged, called from %pF\n",
339                        __func__, __builtin_return_address(0));
340                 return 0;
341         }
342
343         if (svc->fwmark == 0) {
344                 /* Remove it from the svc_table table */
345                 list_del(&svc->s_list);
346         } else {
347                 /* Remove it from the svc_fwm_table table */
348                 list_del(&svc->f_list);
349         }
350
351         svc->flags &= ~IP_VS_SVC_F_HASHED;
352         atomic_dec(&svc->refcnt);
353         return 1;
354 }
355
356
357 /*
358  *      Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362                      const union nf_inet_addr *vaddr, __be16 vport)
363 {
364         unsigned int hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->af == af)
372                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
373                     && (svc->port == vport)
374                     && (svc->protocol == protocol)
375                     && net_eq(svc->net, net)) {
376                         /* HIT */
377                         return svc;
378                 }
379         }
380
381         return NULL;
382 }
383
384
385 /*
386  *      Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391         unsigned int hash;
392         struct ip_vs_service *svc;
393
394         /* Check for fwmark addressed entries */
395         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398                 if (svc->fwmark == fwmark && svc->af == af
399                     && net_eq(svc->net, net)) {
400                         /* HIT */
401                         return svc;
402                 }
403         }
404
405         return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410                   const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413         struct netns_ipvs *ipvs = net_ipvs(net);
414
415         read_lock(&__ip_vs_svc_lock);
416
417         /*
418          *      Check the table hashed by fwmark first
419          */
420         if (fwmark) {
421                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422                 if (svc)
423                         goto out;
424         }
425
426         /*
427          *      Check the table hashed by <protocol,addr,port>
428          *      for "full" addressed entries
429          */
430         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432         if (svc == NULL
433             && protocol == IPPROTO_TCP
434             && atomic_read(&ipvs->ftpsvc_counter)
435             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436                 /*
437                  * Check if ftp service entry exists, the packet
438                  * might belong to FTP data connections.
439                  */
440                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441         }
442
443         if (svc == NULL
444             && atomic_read(&ipvs->nullsvc_counter)) {
445                 /*
446                  * Check if the catch-all port (port zero) exists
447                  */
448                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449         }
450
451   out:
452         if (svc)
453                 atomic_inc(&svc->usecnt);
454         read_unlock(&__ip_vs_svc_lock);
455
456         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457                       fwmark, ip_vs_proto_name(protocol),
458                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459                       svc ? "hit" : "not hit");
460
461         return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468         atomic_inc(&svc->refcnt);
469         dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475         struct ip_vs_service *svc = dest->svc;
476
477         dest->svc = NULL;
478         if (atomic_dec_and_test(&svc->refcnt)) {
479                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480                               svc->fwmark,
481                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
482                               ntohs(svc->port), atomic_read(&svc->usecnt));
483                 free_percpu(svc->stats.cpustats);
484                 kfree(svc);
485         }
486 }
487
488
489 /*
490  *      Returns hash value for real service
491  */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493                                             const union nf_inet_addr *addr,
494                                             __be16 port)
495 {
496         register unsigned int porth = ntohs(port);
497         __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500         if (af == AF_INET6)
501                 addr_fold = addr->ip6[0]^addr->ip6[1]^
502                             addr->ip6[2]^addr->ip6[3];
503 #endif
504
505         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506                 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *      should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515         unsigned int hash;
516
517         if (!list_empty(&dest->d_list)) {
518                 return 0;
519         }
520
521         /*
522          *      Hash by proto,addr,port,
523          *      which are the parameters of the real service.
524          */
525         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527         list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529         return 1;
530 }
531
532 /*
533  *      UNhashes ip_vs_dest from rs_table.
534  *      should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538         /*
539          * Remove it from the rs_table table.
540          */
541         if (!list_empty(&dest->d_list)) {
542                 list_del(&dest->d_list);
543                 INIT_LIST_HEAD(&dest->d_list);
544         }
545
546         return 1;
547 }
548
549 /*
550  *      Lookup real service by <proto,addr,port> in the real service table.
551  */
552 struct ip_vs_dest *
553 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
554                           const union nf_inet_addr *daddr,
555                           __be16 dport)
556 {
557         struct netns_ipvs *ipvs = net_ipvs(net);
558         unsigned int hash;
559         struct ip_vs_dest *dest;
560
561         /*
562          *      Check for "full" addressed entries
563          *      Return the first found entry
564          */
565         hash = ip_vs_rs_hashkey(af, daddr, dport);
566
567         read_lock(&ipvs->rs_lock);
568         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
569                 if ((dest->af == af)
570                     && ip_vs_addr_equal(af, &dest->addr, daddr)
571                     && (dest->port == dport)
572                     && ((dest->protocol == protocol) ||
573                         dest->vfwmark)) {
574                         /* HIT */
575                         read_unlock(&ipvs->rs_lock);
576                         return dest;
577                 }
578         }
579         read_unlock(&ipvs->rs_lock);
580
581         return NULL;
582 }
583
584 /*
585  *      Lookup destination by {addr,port} in the given service
586  */
587 static struct ip_vs_dest *
588 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589                   __be16 dport)
590 {
591         struct ip_vs_dest *dest;
592
593         /*
594          * Find the destination for the given service
595          */
596         list_for_each_entry(dest, &svc->destinations, n_list) {
597                 if ((dest->af == svc->af)
598                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599                     && (dest->port == dport)) {
600                         /* HIT */
601                         return dest;
602                 }
603         }
604
605         return NULL;
606 }
607
608 /*
609  * Find destination by {daddr,dport,vaddr,protocol}
610  * Cretaed to be used in ip_vs_process_message() in
611  * the backup synchronization daemon. It finds the
612  * destination to be bound to the received connection
613  * on the backup.
614  *
615  * ip_vs_lookup_real_service() looked promissing, but
616  * seems not working as expected.
617  */
618 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
619                                    const union nf_inet_addr *daddr,
620                                    __be16 dport,
621                                    const union nf_inet_addr *vaddr,
622                                    __be16 vport, __u16 protocol, __u32 fwmark,
623                                    __u32 flags)
624 {
625         struct ip_vs_dest *dest;
626         struct ip_vs_service *svc;
627         __be16 port = dport;
628
629         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
630         if (!svc)
631                 return NULL;
632         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
633                 port = 0;
634         dest = ip_vs_lookup_dest(svc, daddr, port);
635         if (!dest)
636                 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
637         if (dest)
638                 atomic_inc(&dest->refcnt);
639         ip_vs_service_put(svc);
640         return dest;
641 }
642
643 /*
644  *  Lookup dest by {svc,addr,port} in the destination trash.
645  *  The destination trash is used to hold the destinations that are removed
646  *  from the service table but are still referenced by some conn entries.
647  *  The reason to add the destination trash is when the dest is temporary
648  *  down (either by administrator or by monitor program), the dest can be
649  *  picked back from the trash, the remaining connections to the dest can
650  *  continue, and the counting information of the dest is also useful for
651  *  scheduling.
652  */
653 static struct ip_vs_dest *
654 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
655                      __be16 dport)
656 {
657         struct ip_vs_dest *dest, *nxt;
658         struct netns_ipvs *ipvs = net_ipvs(svc->net);
659
660         /*
661          * Find the destination in trash
662          */
663         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
664                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
665                               "dest->refcnt=%d\n",
666                               dest->vfwmark,
667                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
668                               ntohs(dest->port),
669                               atomic_read(&dest->refcnt));
670                 if (dest->af == svc->af &&
671                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
672                     dest->port == dport &&
673                     dest->vfwmark == svc->fwmark &&
674                     dest->protocol == svc->protocol &&
675                     (svc->fwmark ||
676                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
677                       dest->vport == svc->port))) {
678                         /* HIT */
679                         return dest;
680                 }
681
682                 /*
683                  * Try to purge the destination from trash if not referenced
684                  */
685                 if (atomic_read(&dest->refcnt) == 1) {
686                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
687                                       "from trash\n",
688                                       dest->vfwmark,
689                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
690                                       ntohs(dest->port));
691                         list_del(&dest->n_list);
692                         ip_vs_dst_reset(dest);
693                         __ip_vs_unbind_svc(dest);
694                         free_percpu(dest->stats.cpustats);
695                         kfree(dest);
696                 }
697         }
698
699         return NULL;
700 }
701
702
703 /*
704  *  Clean up all the destinations in the trash
705  *  Called by the ip_vs_control_cleanup()
706  *
707  *  When the ip_vs_control_clearup is activated by ipvs module exit,
708  *  the service tables must have been flushed and all the connections
709  *  are expired, and the refcnt of each destination in the trash must
710  *  be 1, so we simply release them here.
711  */
712 static void ip_vs_trash_cleanup(struct net *net)
713 {
714         struct ip_vs_dest *dest, *nxt;
715         struct netns_ipvs *ipvs = net_ipvs(net);
716
717         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
718                 list_del(&dest->n_list);
719                 ip_vs_dst_reset(dest);
720                 __ip_vs_unbind_svc(dest);
721                 free_percpu(dest->stats.cpustats);
722                 kfree(dest);
723         }
724 }
725
726 static void
727 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
728 {
729 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
730
731         spin_lock_bh(&src->lock);
732
733         IP_VS_SHOW_STATS_COUNTER(conns);
734         IP_VS_SHOW_STATS_COUNTER(inpkts);
735         IP_VS_SHOW_STATS_COUNTER(outpkts);
736         IP_VS_SHOW_STATS_COUNTER(inbytes);
737         IP_VS_SHOW_STATS_COUNTER(outbytes);
738
739         ip_vs_read_estimator(dst, src);
740
741         spin_unlock_bh(&src->lock);
742 }
743
744 static void
745 ip_vs_zero_stats(struct ip_vs_stats *stats)
746 {
747         spin_lock_bh(&stats->lock);
748
749         /* get current counters as zero point, rates are zeroed */
750
751 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
752
753         IP_VS_ZERO_STATS_COUNTER(conns);
754         IP_VS_ZERO_STATS_COUNTER(inpkts);
755         IP_VS_ZERO_STATS_COUNTER(outpkts);
756         IP_VS_ZERO_STATS_COUNTER(inbytes);
757         IP_VS_ZERO_STATS_COUNTER(outbytes);
758
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769                     struct ip_vs_dest_user_kern *udest, int add)
770 {
771         struct netns_ipvs *ipvs = net_ipvs(svc->net);
772         int conn_flags;
773
774         /* set the weight and the flags */
775         atomic_set(&dest->weight, udest->weight);
776         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777         conn_flags |= IP_VS_CONN_F_INACTIVE;
778
779         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
780         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
781                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782         } else {
783                 /*
784                  *    Put the real service in rs_table if not present.
785                  *    For now only for NAT!
786                  */
787                 write_lock_bh(&ipvs->rs_lock);
788                 ip_vs_rs_hash(ipvs, dest);
789                 write_unlock_bh(&ipvs->rs_lock);
790         }
791         atomic_set(&dest->conn_flags, conn_flags);
792
793         /* bind the service */
794         if (!dest->svc) {
795                 __ip_vs_bind_svc(dest, svc);
796         } else {
797                 if (dest->svc != svc) {
798                         __ip_vs_unbind_svc(dest);
799                         ip_vs_zero_stats(&dest->stats);
800                         __ip_vs_bind_svc(dest, svc);
801                 }
802         }
803
804         /* set the dest status flags */
805         dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809         dest->u_threshold = udest->u_threshold;
810         dest->l_threshold = udest->l_threshold;
811
812         spin_lock_bh(&dest->dst_lock);
813         ip_vs_dst_reset(dest);
814         spin_unlock_bh(&dest->dst_lock);
815
816         if (add)
817                 ip_vs_start_estimator(svc->net, &dest->stats);
818
819         write_lock_bh(&__ip_vs_svc_lock);
820
821         /* Wait until all other svc users go away */
822         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824         if (add) {
825                 list_add(&dest->n_list, &svc->destinations);
826                 svc->num_dests++;
827         }
828
829         /* call the update_service, because server weight may be changed */
830         if (svc->scheduler->update_service)
831                 svc->scheduler->update_service(svc);
832
833         write_unlock_bh(&__ip_vs_svc_lock);
834 }
835
836
837 /*
838  *      Create a destination for the given service
839  */
840 static int
841 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
842                struct ip_vs_dest **dest_p)
843 {
844         struct ip_vs_dest *dest;
845         unsigned int atype;
846
847         EnterFunction(2);
848
849 #ifdef CONFIG_IP_VS_IPV6
850         if (svc->af == AF_INET6) {
851                 atype = ipv6_addr_type(&udest->addr.in6);
852                 if ((!(atype & IPV6_ADDR_UNICAST) ||
853                         atype & IPV6_ADDR_LINKLOCAL) &&
854                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
855                         return -EINVAL;
856         } else
857 #endif
858         {
859                 atype = inet_addr_type(svc->net, udest->addr.ip);
860                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861                         return -EINVAL;
862         }
863
864         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
865         if (dest == NULL)
866                 return -ENOMEM;
867
868         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
869         if (!dest->stats.cpustats)
870                 goto err_alloc;
871
872         dest->af = svc->af;
873         dest->protocol = svc->protocol;
874         dest->vaddr = svc->addr;
875         dest->vport = svc->port;
876         dest->vfwmark = svc->fwmark;
877         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
878         dest->port = udest->port;
879
880         atomic_set(&dest->activeconns, 0);
881         atomic_set(&dest->inactconns, 0);
882         atomic_set(&dest->persistconns, 0);
883         atomic_set(&dest->refcnt, 1);
884
885         INIT_LIST_HEAD(&dest->d_list);
886         spin_lock_init(&dest->dst_lock);
887         spin_lock_init(&dest->stats.lock);
888         __ip_vs_update_dest(svc, dest, udest, 1);
889
890         *dest_p = dest;
891
892         LeaveFunction(2);
893         return 0;
894
895 err_alloc:
896         kfree(dest);
897         return -ENOMEM;
898 }
899
900
901 /*
902  *      Add a destination into an existing service
903  */
904 static int
905 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
906 {
907         struct ip_vs_dest *dest;
908         union nf_inet_addr daddr;
909         __be16 dport = udest->port;
910         int ret;
911
912         EnterFunction(2);
913
914         if (udest->weight < 0) {
915                 pr_err("%s(): server weight less than zero\n", __func__);
916                 return -ERANGE;
917         }
918
919         if (udest->l_threshold > udest->u_threshold) {
920                 pr_err("%s(): lower threshold is higher than upper threshold\n",
921                         __func__);
922                 return -ERANGE;
923         }
924
925         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
926
927         /*
928          * Check if the dest already exists in the list
929          */
930         dest = ip_vs_lookup_dest(svc, &daddr, dport);
931
932         if (dest != NULL) {
933                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
934                 return -EEXIST;
935         }
936
937         /*
938          * Check if the dest already exists in the trash and
939          * is from the same service
940          */
941         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
942
943         if (dest != NULL) {
944                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
945                               "dest->refcnt=%d, service %u/%s:%u\n",
946                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
947                               atomic_read(&dest->refcnt),
948                               dest->vfwmark,
949                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
950                               ntohs(dest->vport));
951
952                 /*
953                  * Get the destination from the trash
954                  */
955                 list_del(&dest->n_list);
956
957                 __ip_vs_update_dest(svc, dest, udest, 1);
958                 ret = 0;
959         } else {
960                 /*
961                  * Allocate and initialize the dest structure
962                  */
963                 ret = ip_vs_new_dest(svc, udest, &dest);
964         }
965         LeaveFunction(2);
966
967         return ret;
968 }
969
970
971 /*
972  *      Edit a destination in the given service
973  */
974 static int
975 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
976 {
977         struct ip_vs_dest *dest;
978         union nf_inet_addr daddr;
979         __be16 dport = udest->port;
980
981         EnterFunction(2);
982
983         if (udest->weight < 0) {
984                 pr_err("%s(): server weight less than zero\n", __func__);
985                 return -ERANGE;
986         }
987
988         if (udest->l_threshold > udest->u_threshold) {
989                 pr_err("%s(): lower threshold is higher than upper threshold\n",
990                         __func__);
991                 return -ERANGE;
992         }
993
994         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
995
996         /*
997          *  Lookup the destination list
998          */
999         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1000
1001         if (dest == NULL) {
1002                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1003                 return -ENOENT;
1004         }
1005
1006         __ip_vs_update_dest(svc, dest, udest, 0);
1007         LeaveFunction(2);
1008
1009         return 0;
1010 }
1011
1012
1013 /*
1014  *      Delete a destination (must be already unlinked from the service)
1015  */
1016 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1017 {
1018         struct netns_ipvs *ipvs = net_ipvs(net);
1019
1020         ip_vs_stop_estimator(net, &dest->stats);
1021
1022         /*
1023          *  Remove it from the d-linked list with the real services.
1024          */
1025         write_lock_bh(&ipvs->rs_lock);
1026         ip_vs_rs_unhash(dest);
1027         write_unlock_bh(&ipvs->rs_lock);
1028
1029         /*
1030          *  Decrease the refcnt of the dest, and free the dest
1031          *  if nobody refers to it (refcnt=0). Otherwise, throw
1032          *  the destination into the trash.
1033          */
1034         if (atomic_dec_and_test(&dest->refcnt)) {
1035                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1036                               dest->vfwmark,
1037                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1038                               ntohs(dest->port));
1039                 ip_vs_dst_reset(dest);
1040                 /* simply decrease svc->refcnt here, let the caller check
1041                    and release the service if nobody refers to it.
1042                    Only user context can release destination and service,
1043                    and only one user context can update virtual service at a
1044                    time, so the operation here is OK */
1045                 atomic_dec(&dest->svc->refcnt);
1046                 free_percpu(dest->stats.cpustats);
1047                 kfree(dest);
1048         } else {
1049                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1050                               "dest->refcnt=%d\n",
1051                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1052                               ntohs(dest->port),
1053                               atomic_read(&dest->refcnt));
1054                 list_add(&dest->n_list, &ipvs->dest_trash);
1055                 atomic_inc(&dest->refcnt);
1056         }
1057 }
1058
1059
1060 /*
1061  *      Unlink a destination from the given service
1062  */
1063 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064                                 struct ip_vs_dest *dest,
1065                                 int svcupd)
1066 {
1067         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069         /*
1070          *  Remove it from the d-linked destination list.
1071          */
1072         list_del(&dest->n_list);
1073         svc->num_dests--;
1074
1075         /*
1076          *  Call the update_service function of its scheduler
1077          */
1078         if (svcupd && svc->scheduler->update_service)
1079                         svc->scheduler->update_service(svc);
1080 }
1081
1082
1083 /*
1084  *      Delete a destination server in the given service
1085  */
1086 static int
1087 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1088 {
1089         struct ip_vs_dest *dest;
1090         __be16 dport = udest->port;
1091
1092         EnterFunction(2);
1093
1094         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1095
1096         if (dest == NULL) {
1097                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1098                 return -ENOENT;
1099         }
1100
1101         write_lock_bh(&__ip_vs_svc_lock);
1102
1103         /*
1104          *      Wait until all other svc users go away.
1105          */
1106         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1107
1108         /*
1109          *      Unlink dest from the service
1110          */
1111         __ip_vs_unlink_dest(svc, dest, 1);
1112
1113         write_unlock_bh(&__ip_vs_svc_lock);
1114
1115         /*
1116          *      Delete the destination
1117          */
1118         __ip_vs_del_dest(svc->net, dest);
1119
1120         LeaveFunction(2);
1121
1122         return 0;
1123 }
1124
1125
1126 /*
1127  *      Add a service into the service hash table
1128  */
1129 static int
1130 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1131                   struct ip_vs_service **svc_p)
1132 {
1133         int ret = 0;
1134         struct ip_vs_scheduler *sched = NULL;
1135         struct ip_vs_pe *pe = NULL;
1136         struct ip_vs_service *svc = NULL;
1137         struct netns_ipvs *ipvs = net_ipvs(net);
1138
1139         /* increase the module use count */
1140         ip_vs_use_count_inc();
1141
1142         /* Lookup the scheduler by 'u->sched_name' */
1143         sched = ip_vs_scheduler_get(u->sched_name);
1144         if (sched == NULL) {
1145                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1146                 ret = -ENOENT;
1147                 goto out_err;
1148         }
1149
1150         if (u->pe_name && *u->pe_name) {
1151                 pe = ip_vs_pe_getbyname(u->pe_name);
1152                 if (pe == NULL) {
1153                         pr_info("persistence engine module ip_vs_pe_%s "
1154                                 "not found\n", u->pe_name);
1155                         ret = -ENOENT;
1156                         goto out_err;
1157                 }
1158         }
1159
1160 #ifdef CONFIG_IP_VS_IPV6
1161         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1162                 ret = -EINVAL;
1163                 goto out_err;
1164         }
1165 #endif
1166
1167         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1168         if (svc == NULL) {
1169                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1170                 ret = -ENOMEM;
1171                 goto out_err;
1172         }
1173         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1174         if (!svc->stats.cpustats) {
1175                 ret = -ENOMEM;
1176                 goto out_err;
1177         }
1178
1179         /* I'm the first user of the service */
1180         atomic_set(&svc->usecnt, 0);
1181         atomic_set(&svc->refcnt, 0);
1182
1183         svc->af = u->af;
1184         svc->protocol = u->protocol;
1185         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1186         svc->port = u->port;
1187         svc->fwmark = u->fwmark;
1188         svc->flags = u->flags;
1189         svc->timeout = u->timeout * HZ;
1190         svc->netmask = u->netmask;
1191         svc->net = net;
1192
1193         INIT_LIST_HEAD(&svc->destinations);
1194         rwlock_init(&svc->sched_lock);
1195         spin_lock_init(&svc->stats.lock);
1196
1197         /* Bind the scheduler */
1198         ret = ip_vs_bind_scheduler(svc, sched);
1199         if (ret)
1200                 goto out_err;
1201         sched = NULL;
1202
1203         /* Bind the ct retriever */
1204         ip_vs_bind_pe(svc, pe);
1205         pe = NULL;
1206
1207         /* Update the virtual service counters */
1208         if (svc->port == FTPPORT)
1209                 atomic_inc(&ipvs->ftpsvc_counter);
1210         else if (svc->port == 0)
1211                 atomic_inc(&ipvs->nullsvc_counter);
1212
1213         ip_vs_start_estimator(net, &svc->stats);
1214
1215         /* Count only IPv4 services for old get/setsockopt interface */
1216         if (svc->af == AF_INET)
1217                 ipvs->num_services++;
1218
1219         /* Hash the service into the service table */
1220         write_lock_bh(&__ip_vs_svc_lock);
1221         ip_vs_svc_hash(svc);
1222         write_unlock_bh(&__ip_vs_svc_lock);
1223
1224         *svc_p = svc;
1225         /* Now there is a service - full throttle */
1226         ipvs->enable = 1;
1227         return 0;
1228
1229
1230  out_err:
1231         if (svc != NULL) {
1232                 ip_vs_unbind_scheduler(svc);
1233                 if (svc->inc) {
1234                         local_bh_disable();
1235                         ip_vs_app_inc_put(svc->inc);
1236                         local_bh_enable();
1237                 }
1238                 if (svc->stats.cpustats)
1239                         free_percpu(svc->stats.cpustats);
1240                 kfree(svc);
1241         }
1242         ip_vs_scheduler_put(sched);
1243         ip_vs_pe_put(pe);
1244
1245         /* decrease the module use count */
1246         ip_vs_use_count_dec();
1247
1248         return ret;
1249 }
1250
1251
1252 /*
1253  *      Edit a service and bind it with a new scheduler
1254  */
1255 static int
1256 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1257 {
1258         struct ip_vs_scheduler *sched, *old_sched;
1259         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1260         int ret = 0;
1261
1262         /*
1263          * Lookup the scheduler, by 'u->sched_name'
1264          */
1265         sched = ip_vs_scheduler_get(u->sched_name);
1266         if (sched == NULL) {
1267                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1268                 return -ENOENT;
1269         }
1270         old_sched = sched;
1271
1272         if (u->pe_name && *u->pe_name) {
1273                 pe = ip_vs_pe_getbyname(u->pe_name);
1274                 if (pe == NULL) {
1275                         pr_info("persistence engine module ip_vs_pe_%s "
1276                                 "not found\n", u->pe_name);
1277                         ret = -ENOENT;
1278                         goto out;
1279                 }
1280                 old_pe = pe;
1281         }
1282
1283 #ifdef CONFIG_IP_VS_IPV6
1284         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1285                 ret = -EINVAL;
1286                 goto out;
1287         }
1288 #endif
1289
1290         write_lock_bh(&__ip_vs_svc_lock);
1291
1292         /*
1293          * Wait until all other svc users go away.
1294          */
1295         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1296
1297         /*
1298          * Set the flags and timeout value
1299          */
1300         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1301         svc->timeout = u->timeout * HZ;
1302         svc->netmask = u->netmask;
1303
1304         old_sched = svc->scheduler;
1305         if (sched != old_sched) {
1306                 /*
1307                  * Unbind the old scheduler
1308                  */
1309                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1310                         old_sched = sched;
1311                         goto out_unlock;
1312                 }
1313
1314                 /*
1315                  * Bind the new scheduler
1316                  */
1317                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1318                         /*
1319                          * If ip_vs_bind_scheduler fails, restore the old
1320                          * scheduler.
1321                          * The main reason of failure is out of memory.
1322                          *
1323                          * The question is if the old scheduler can be
1324                          * restored all the time. TODO: if it cannot be
1325                          * restored some time, we must delete the service,
1326                          * otherwise the system may crash.
1327                          */
1328                         ip_vs_bind_scheduler(svc, old_sched);
1329                         old_sched = sched;
1330                         goto out_unlock;
1331                 }
1332         }
1333
1334         old_pe = svc->pe;
1335         if (pe != old_pe) {
1336                 ip_vs_unbind_pe(svc);
1337                 ip_vs_bind_pe(svc, pe);
1338         }
1339
1340 out_unlock:
1341         write_unlock_bh(&__ip_vs_svc_lock);
1342 out:
1343         ip_vs_scheduler_put(old_sched);
1344         ip_vs_pe_put(old_pe);
1345         return ret;
1346 }
1347
1348
1349 /*
1350  *      Delete a service from the service list
1351  *      - The service must be unlinked, unlocked and not referenced!
1352  *      - We are called under _bh lock
1353  */
1354 static void __ip_vs_del_service(struct ip_vs_service *svc)
1355 {
1356         struct ip_vs_dest *dest, *nxt;
1357         struct ip_vs_scheduler *old_sched;
1358         struct ip_vs_pe *old_pe;
1359         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1360
1361         pr_info("%s: enter\n", __func__);
1362
1363         /* Count only IPv4 services for old get/setsockopt interface */
1364         if (svc->af == AF_INET)
1365                 ipvs->num_services--;
1366
1367         ip_vs_stop_estimator(svc->net, &svc->stats);
1368
1369         /* Unbind scheduler */
1370         old_sched = svc->scheduler;
1371         ip_vs_unbind_scheduler(svc);
1372         ip_vs_scheduler_put(old_sched);
1373
1374         /* Unbind persistence engine */
1375         old_pe = svc->pe;
1376         ip_vs_unbind_pe(svc);
1377         ip_vs_pe_put(old_pe);
1378
1379         /* Unbind app inc */
1380         if (svc->inc) {
1381                 ip_vs_app_inc_put(svc->inc);
1382                 svc->inc = NULL;
1383         }
1384
1385         /*
1386          *    Unlink the whole destination list
1387          */
1388         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1389                 __ip_vs_unlink_dest(svc, dest, 0);
1390                 __ip_vs_del_dest(svc->net, dest);
1391         }
1392
1393         /*
1394          *    Update the virtual service counters
1395          */
1396         if (svc->port == FTPPORT)
1397                 atomic_dec(&ipvs->ftpsvc_counter);
1398         else if (svc->port == 0)
1399                 atomic_dec(&ipvs->nullsvc_counter);
1400
1401         /*
1402          *    Free the service if nobody refers to it
1403          */
1404         if (atomic_read(&svc->refcnt) == 0) {
1405                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1406                               svc->fwmark,
1407                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1408                               ntohs(svc->port), atomic_read(&svc->usecnt));
1409                 free_percpu(svc->stats.cpustats);
1410                 kfree(svc);
1411         }
1412
1413         /* decrease the module use count */
1414         ip_vs_use_count_dec();
1415 }
1416
1417 /*
1418  * Unlink a service from list and try to delete it if its refcnt reached 0
1419  */
1420 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1421 {
1422         /*
1423          * Unhash it from the service table
1424          */
1425         write_lock_bh(&__ip_vs_svc_lock);
1426
1427         ip_vs_svc_unhash(svc);
1428
1429         /*
1430          * Wait until all the svc users go away.
1431          */
1432         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1433
1434         __ip_vs_del_service(svc);
1435
1436         write_unlock_bh(&__ip_vs_svc_lock);
1437 }
1438
1439 /*
1440  *      Delete a service from the service list
1441  */
1442 static int ip_vs_del_service(struct ip_vs_service *svc)
1443 {
1444         if (svc == NULL)
1445                 return -EEXIST;
1446         ip_vs_unlink_service(svc);
1447
1448         return 0;
1449 }
1450
1451
1452 /*
1453  *      Flush all the virtual services
1454  */
1455 static int ip_vs_flush(struct net *net)
1456 {
1457         int idx;
1458         struct ip_vs_service *svc, *nxt;
1459
1460         /*
1461          * Flush the service table hashed by <netns,protocol,addr,port>
1462          */
1463         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1464                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1465                                          s_list) {
1466                         if (net_eq(svc->net, net))
1467                                 ip_vs_unlink_service(svc);
1468                 }
1469         }
1470
1471         /*
1472          * Flush the service table hashed by fwmark
1473          */
1474         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1475                 list_for_each_entry_safe(svc, nxt,
1476                                          &ip_vs_svc_fwm_table[idx], f_list) {
1477                         if (net_eq(svc->net, net))
1478                                 ip_vs_unlink_service(svc);
1479                 }
1480         }
1481
1482         return 0;
1483 }
1484
1485 /*
1486  *      Delete service by {netns} in the service table.
1487  *      Called by __ip_vs_cleanup()
1488  */
1489 void ip_vs_service_net_cleanup(struct net *net)
1490 {
1491         EnterFunction(2);
1492         /* Check for "full" addressed entries */
1493         mutex_lock(&__ip_vs_mutex);
1494         ip_vs_flush(net);
1495         mutex_unlock(&__ip_vs_mutex);
1496         LeaveFunction(2);
1497 }
1498 /*
1499  * Release dst hold by dst_cache
1500  */
1501 static inline void
1502 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1503 {
1504         spin_lock_bh(&dest->dst_lock);
1505         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1506                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1507                               dev->name,
1508                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1509                               ntohs(dest->port),
1510                               atomic_read(&dest->refcnt));
1511                 ip_vs_dst_reset(dest);
1512         }
1513         spin_unlock_bh(&dest->dst_lock);
1514
1515 }
1516 /*
1517  * Netdev event receiver
1518  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1519  * a device that is "unregister" it must be released.
1520  */
1521 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1522                             void *ptr)
1523 {
1524         struct net_device *dev = ptr;
1525         struct net *net = dev_net(dev);
1526         struct netns_ipvs *ipvs = net_ipvs(net);
1527         struct ip_vs_service *svc;
1528         struct ip_vs_dest *dest;
1529         unsigned int idx;
1530
1531         if (event != NETDEV_UNREGISTER || !ipvs)
1532                 return NOTIFY_DONE;
1533         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1534         EnterFunction(2);
1535         mutex_lock(&__ip_vs_mutex);
1536         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1537                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1538                         if (net_eq(svc->net, net)) {
1539                                 list_for_each_entry(dest, &svc->destinations,
1540                                                     n_list) {
1541                                         __ip_vs_dev_reset(dest, dev);
1542                                 }
1543                         }
1544                 }
1545
1546                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1547                         if (net_eq(svc->net, net)) {
1548                                 list_for_each_entry(dest, &svc->destinations,
1549                                                     n_list) {
1550                                         __ip_vs_dev_reset(dest, dev);
1551                                 }
1552                         }
1553
1554                 }
1555         }
1556
1557         list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1558                 __ip_vs_dev_reset(dest, dev);
1559         }
1560         mutex_unlock(&__ip_vs_mutex);
1561         LeaveFunction(2);
1562         return NOTIFY_DONE;
1563 }
1564
1565 /*
1566  *      Zero counters in a service or all services
1567  */
1568 static int ip_vs_zero_service(struct ip_vs_service *svc)
1569 {
1570         struct ip_vs_dest *dest;
1571
1572         write_lock_bh(&__ip_vs_svc_lock);
1573         list_for_each_entry(dest, &svc->destinations, n_list) {
1574                 ip_vs_zero_stats(&dest->stats);
1575         }
1576         ip_vs_zero_stats(&svc->stats);
1577         write_unlock_bh(&__ip_vs_svc_lock);
1578         return 0;
1579 }
1580
1581 static int ip_vs_zero_all(struct net *net)
1582 {
1583         int idx;
1584         struct ip_vs_service *svc;
1585
1586         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1587                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1588                         if (net_eq(svc->net, net))
1589                                 ip_vs_zero_service(svc);
1590                 }
1591         }
1592
1593         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1594                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1595                         if (net_eq(svc->net, net))
1596                                 ip_vs_zero_service(svc);
1597                 }
1598         }
1599
1600         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1601         return 0;
1602 }
1603
1604 #ifdef CONFIG_SYSCTL
1605
1606 static int zero;
1607 static int three = 3;
1608
1609 static int
1610 proc_do_defense_mode(ctl_table *table, int write,
1611                      void __user *buffer, size_t *lenp, loff_t *ppos)
1612 {
1613         struct net *net = current->nsproxy->net_ns;
1614         int *valp = table->data;
1615         int val = *valp;
1616         int rc;
1617
1618         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1619         if (write && (*valp != val)) {
1620                 if ((*valp < 0) || (*valp > 3)) {
1621                         /* Restore the correct value */
1622                         *valp = val;
1623                 } else {
1624                         update_defense_level(net_ipvs(net));
1625                 }
1626         }
1627         return rc;
1628 }
1629
1630 static int
1631 proc_do_sync_threshold(ctl_table *table, int write,
1632                        void __user *buffer, size_t *lenp, loff_t *ppos)
1633 {
1634         int *valp = table->data;
1635         int val[2];
1636         int rc;
1637
1638         /* backup the value first */
1639         memcpy(val, valp, sizeof(val));
1640
1641         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1642         if (write && (valp[0] < 0 || valp[1] < 0 ||
1643             (valp[0] >= valp[1] && valp[1]))) {
1644                 /* Restore the correct value */
1645                 memcpy(valp, val, sizeof(val));
1646         }
1647         return rc;
1648 }
1649
1650 static int
1651 proc_do_sync_mode(ctl_table *table, int write,
1652                      void __user *buffer, size_t *lenp, loff_t *ppos)
1653 {
1654         int *valp = table->data;
1655         int val = *valp;
1656         int rc;
1657
1658         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1659         if (write && (*valp != val)) {
1660                 if ((*valp < 0) || (*valp > 1)) {
1661                         /* Restore the correct value */
1662                         *valp = val;
1663                 }
1664         }
1665         return rc;
1666 }
1667
1668 static int
1669 proc_do_sync_ports(ctl_table *table, int write,
1670                    void __user *buffer, size_t *lenp, loff_t *ppos)
1671 {
1672         int *valp = table->data;
1673         int val = *valp;
1674         int rc;
1675
1676         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1677         if (write && (*valp != val)) {
1678                 if (*valp < 1 || !is_power_of_2(*valp)) {
1679                         /* Restore the correct value */
1680                         *valp = val;
1681                 }
1682         }
1683         return rc;
1684 }
1685
1686 /*
1687  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1688  *      Do not change order or insert new entries without
1689  *      align with netns init in ip_vs_control_net_init()
1690  */
1691
1692 static struct ctl_table vs_vars[] = {
1693         {
1694                 .procname       = "amemthresh",
1695                 .maxlen         = sizeof(int),
1696                 .mode           = 0644,
1697                 .proc_handler   = proc_dointvec,
1698         },
1699         {
1700                 .procname       = "am_droprate",
1701                 .maxlen         = sizeof(int),
1702                 .mode           = 0644,
1703                 .proc_handler   = proc_dointvec,
1704         },
1705         {
1706                 .procname       = "drop_entry",
1707                 .maxlen         = sizeof(int),
1708                 .mode           = 0644,
1709                 .proc_handler   = proc_do_defense_mode,
1710         },
1711         {
1712                 .procname       = "drop_packet",
1713                 .maxlen         = sizeof(int),
1714                 .mode           = 0644,
1715                 .proc_handler   = proc_do_defense_mode,
1716         },
1717 #ifdef CONFIG_IP_VS_NFCT
1718         {
1719                 .procname       = "conntrack",
1720                 .maxlen         = sizeof(int),
1721                 .mode           = 0644,
1722                 .proc_handler   = &proc_dointvec,
1723         },
1724 #endif
1725         {
1726                 .procname       = "secure_tcp",
1727                 .maxlen         = sizeof(int),
1728                 .mode           = 0644,
1729                 .proc_handler   = proc_do_defense_mode,
1730         },
1731         {
1732                 .procname       = "snat_reroute",
1733                 .maxlen         = sizeof(int),
1734                 .mode           = 0644,
1735                 .proc_handler   = &proc_dointvec,
1736         },
1737         {
1738                 .procname       = "sync_version",
1739                 .maxlen         = sizeof(int),
1740                 .mode           = 0644,
1741                 .proc_handler   = &proc_do_sync_mode,
1742         },
1743         {
1744                 .procname       = "sync_ports",
1745                 .maxlen         = sizeof(int),
1746                 .mode           = 0644,
1747                 .proc_handler   = &proc_do_sync_ports,
1748         },
1749         {
1750                 .procname       = "sync_qlen_max",
1751                 .maxlen         = sizeof(int),
1752                 .mode           = 0644,
1753                 .proc_handler   = proc_dointvec,
1754         },
1755         {
1756                 .procname       = "sync_sock_size",
1757                 .maxlen         = sizeof(int),
1758                 .mode           = 0644,
1759                 .proc_handler   = proc_dointvec,
1760         },
1761         {
1762                 .procname       = "cache_bypass",
1763                 .maxlen         = sizeof(int),
1764                 .mode           = 0644,
1765                 .proc_handler   = proc_dointvec,
1766         },
1767         {
1768                 .procname       = "expire_nodest_conn",
1769                 .maxlen         = sizeof(int),
1770                 .mode           = 0644,
1771                 .proc_handler   = proc_dointvec,
1772         },
1773         {
1774                 .procname       = "expire_quiescent_template",
1775                 .maxlen         = sizeof(int),
1776                 .mode           = 0644,
1777                 .proc_handler   = proc_dointvec,
1778         },
1779         {
1780                 .procname       = "sync_threshold",
1781                 .maxlen         =
1782                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1783                 .mode           = 0644,
1784                 .proc_handler   = proc_do_sync_threshold,
1785         },
1786         {
1787                 .procname       = "sync_refresh_period",
1788                 .maxlen         = sizeof(int),
1789                 .mode           = 0644,
1790                 .proc_handler   = proc_dointvec_jiffies,
1791         },
1792         {
1793                 .procname       = "sync_retries",
1794                 .maxlen         = sizeof(int),
1795                 .mode           = 0644,
1796                 .proc_handler   = proc_dointvec_minmax,
1797                 .extra1         = &zero,
1798                 .extra2         = &three,
1799         },
1800         {
1801                 .procname       = "nat_icmp_send",
1802                 .maxlen         = sizeof(int),
1803                 .mode           = 0644,
1804                 .proc_handler   = proc_dointvec,
1805         },
1806         {
1807                 .procname       = "pmtu_disc",
1808                 .maxlen         = sizeof(int),
1809                 .mode           = 0644,
1810                 .proc_handler   = proc_dointvec,
1811         },
1812 #ifdef CONFIG_IP_VS_DEBUG
1813         {
1814                 .procname       = "debug_level",
1815                 .data           = &sysctl_ip_vs_debug_level,
1816                 .maxlen         = sizeof(int),
1817                 .mode           = 0644,
1818                 .proc_handler   = proc_dointvec,
1819         },
1820 #endif
1821 #if 0
1822         {
1823                 .procname       = "timeout_established",
1824                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1825                 .maxlen         = sizeof(int),
1826                 .mode           = 0644,
1827                 .proc_handler   = proc_dointvec_jiffies,
1828         },
1829         {
1830                 .procname       = "timeout_synsent",
1831                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1832                 .maxlen         = sizeof(int),
1833                 .mode           = 0644,
1834                 .proc_handler   = proc_dointvec_jiffies,
1835         },
1836         {
1837                 .procname       = "timeout_synrecv",
1838                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1839                 .maxlen         = sizeof(int),
1840                 .mode           = 0644,
1841                 .proc_handler   = proc_dointvec_jiffies,
1842         },
1843         {
1844                 .procname       = "timeout_finwait",
1845                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1846                 .maxlen         = sizeof(int),
1847                 .mode           = 0644,
1848                 .proc_handler   = proc_dointvec_jiffies,
1849         },
1850         {
1851                 .procname       = "timeout_timewait",
1852                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1853                 .maxlen         = sizeof(int),
1854                 .mode           = 0644,
1855                 .proc_handler   = proc_dointvec_jiffies,
1856         },
1857         {
1858                 .procname       = "timeout_close",
1859                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1860                 .maxlen         = sizeof(int),
1861                 .mode           = 0644,
1862                 .proc_handler   = proc_dointvec_jiffies,
1863         },
1864         {
1865                 .procname       = "timeout_closewait",
1866                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1867                 .maxlen         = sizeof(int),
1868                 .mode           = 0644,
1869                 .proc_handler   = proc_dointvec_jiffies,
1870         },
1871         {
1872                 .procname       = "timeout_lastack",
1873                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1874                 .maxlen         = sizeof(int),
1875                 .mode           = 0644,
1876                 .proc_handler   = proc_dointvec_jiffies,
1877         },
1878         {
1879                 .procname       = "timeout_listen",
1880                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1881                 .maxlen         = sizeof(int),
1882                 .mode           = 0644,
1883                 .proc_handler   = proc_dointvec_jiffies,
1884         },
1885         {
1886                 .procname       = "timeout_synack",
1887                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1888                 .maxlen         = sizeof(int),
1889                 .mode           = 0644,
1890                 .proc_handler   = proc_dointvec_jiffies,
1891         },
1892         {
1893                 .procname       = "timeout_udp",
1894                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1895                 .maxlen         = sizeof(int),
1896                 .mode           = 0644,
1897                 .proc_handler   = proc_dointvec_jiffies,
1898         },
1899         {
1900                 .procname       = "timeout_icmp",
1901                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1902                 .maxlen         = sizeof(int),
1903                 .mode           = 0644,
1904                 .proc_handler   = proc_dointvec_jiffies,
1905         },
1906 #endif
1907         { }
1908 };
1909
1910 #endif
1911
1912 #ifdef CONFIG_PROC_FS
1913
1914 struct ip_vs_iter {
1915         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1916         struct list_head *table;
1917         int bucket;
1918 };
1919
1920 /*
1921  *      Write the contents of the VS rule table to a PROCfs file.
1922  *      (It is kept just for backward compatibility)
1923  */
1924 static inline const char *ip_vs_fwd_name(unsigned int flags)
1925 {
1926         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1927         case IP_VS_CONN_F_LOCALNODE:
1928                 return "Local";
1929         case IP_VS_CONN_F_TUNNEL:
1930                 return "Tunnel";
1931         case IP_VS_CONN_F_DROUTE:
1932                 return "Route";
1933         default:
1934                 return "Masq";
1935         }
1936 }
1937
1938
1939 /* Get the Nth entry in the two lists */
1940 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1941 {
1942         struct net *net = seq_file_net(seq);
1943         struct ip_vs_iter *iter = seq->private;
1944         int idx;
1945         struct ip_vs_service *svc;
1946
1947         /* look in hash by protocol */
1948         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1949                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1950                         if (net_eq(svc->net, net) && pos-- == 0) {
1951                                 iter->table = ip_vs_svc_table;
1952                                 iter->bucket = idx;
1953                                 return svc;
1954                         }
1955                 }
1956         }
1957
1958         /* keep looking in fwmark */
1959         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1960                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1961                         if (net_eq(svc->net, net) && pos-- == 0) {
1962                                 iter->table = ip_vs_svc_fwm_table;
1963                                 iter->bucket = idx;
1964                                 return svc;
1965                         }
1966                 }
1967         }
1968
1969         return NULL;
1970 }
1971
1972 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1973 __acquires(__ip_vs_svc_lock)
1974 {
1975
1976         read_lock_bh(&__ip_vs_svc_lock);
1977         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1978 }
1979
1980
1981 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1982 {
1983         struct list_head *e;
1984         struct ip_vs_iter *iter;
1985         struct ip_vs_service *svc;
1986
1987         ++*pos;
1988         if (v == SEQ_START_TOKEN)
1989                 return ip_vs_info_array(seq,0);
1990
1991         svc = v;
1992         iter = seq->private;
1993
1994         if (iter->table == ip_vs_svc_table) {
1995                 /* next service in table hashed by protocol */
1996                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1997                         return list_entry(e, struct ip_vs_service, s_list);
1998
1999
2000                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2001                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2002                                             s_list) {
2003                                 return svc;
2004                         }
2005                 }
2006
2007                 iter->table = ip_vs_svc_fwm_table;
2008                 iter->bucket = -1;
2009                 goto scan_fwmark;
2010         }
2011
2012         /* next service in hashed by fwmark */
2013         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2014                 return list_entry(e, struct ip_vs_service, f_list);
2015
2016  scan_fwmark:
2017         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2018                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2019                                     f_list)
2020                         return svc;
2021         }
2022
2023         return NULL;
2024 }
2025
2026 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2027 __releases(__ip_vs_svc_lock)
2028 {
2029         read_unlock_bh(&__ip_vs_svc_lock);
2030 }
2031
2032
2033 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2034 {
2035         if (v == SEQ_START_TOKEN) {
2036                 seq_printf(seq,
2037                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2038                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2039                 seq_puts(seq,
2040                          "Prot LocalAddress:Port Scheduler Flags\n");
2041                 seq_puts(seq,
2042                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2043         } else {
2044                 const struct ip_vs_service *svc = v;
2045                 const struct ip_vs_iter *iter = seq->private;
2046                 const struct ip_vs_dest *dest;
2047
2048                 if (iter->table == ip_vs_svc_table) {
2049 #ifdef CONFIG_IP_VS_IPV6
2050                         if (svc->af == AF_INET6)
2051                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2052                                            ip_vs_proto_name(svc->protocol),
2053                                            &svc->addr.in6,
2054                                            ntohs(svc->port),
2055                                            svc->scheduler->name);
2056                         else
2057 #endif
2058                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2059                                            ip_vs_proto_name(svc->protocol),
2060                                            ntohl(svc->addr.ip),
2061                                            ntohs(svc->port),
2062                                            svc->scheduler->name,
2063                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2064                 } else {
2065                         seq_printf(seq, "FWM  %08X %s %s",
2066                                    svc->fwmark, svc->scheduler->name,
2067                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2068                 }
2069
2070                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2071                         seq_printf(seq, "persistent %d %08X\n",
2072                                 svc->timeout,
2073                                 ntohl(svc->netmask));
2074                 else
2075                         seq_putc(seq, '\n');
2076
2077                 list_for_each_entry(dest, &svc->destinations, n_list) {
2078 #ifdef CONFIG_IP_VS_IPV6
2079                         if (dest->af == AF_INET6)
2080                                 seq_printf(seq,
2081                                            "  -> [%pI6]:%04X"
2082                                            "      %-7s %-6d %-10d %-10d\n",
2083                                            &dest->addr.in6,
2084                                            ntohs(dest->port),
2085                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2086                                            atomic_read(&dest->weight),
2087                                            atomic_read(&dest->activeconns),
2088                                            atomic_read(&dest->inactconns));
2089                         else
2090 #endif
2091                                 seq_printf(seq,
2092                                            "  -> %08X:%04X      "
2093                                            "%-7s %-6d %-10d %-10d\n",
2094                                            ntohl(dest->addr.ip),
2095                                            ntohs(dest->port),
2096                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2097                                            atomic_read(&dest->weight),
2098                                            atomic_read(&dest->activeconns),
2099                                            atomic_read(&dest->inactconns));
2100
2101                 }
2102         }
2103         return 0;
2104 }
2105
2106 static const struct seq_operations ip_vs_info_seq_ops = {
2107         .start = ip_vs_info_seq_start,
2108         .next  = ip_vs_info_seq_next,
2109         .stop  = ip_vs_info_seq_stop,
2110         .show  = ip_vs_info_seq_show,
2111 };
2112
2113 static int ip_vs_info_open(struct inode *inode, struct file *file)
2114 {
2115         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2116                         sizeof(struct ip_vs_iter));
2117 }
2118
2119 static const struct file_operations ip_vs_info_fops = {
2120         .owner   = THIS_MODULE,
2121         .open    = ip_vs_info_open,
2122         .read    = seq_read,
2123         .llseek  = seq_lseek,
2124         .release = seq_release_net,
2125 };
2126
2127 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2128 {
2129         struct net *net = seq_file_single_net(seq);
2130         struct ip_vs_stats_user show;
2131
2132 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2133         seq_puts(seq,
2134                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2135         seq_printf(seq,
2136                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2137
2138         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2139         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2140                    show.inpkts, show.outpkts,
2141                    (unsigned long long) show.inbytes,
2142                    (unsigned long long) show.outbytes);
2143
2144 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2145         seq_puts(seq,
2146                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2147         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2148                         show.cps, show.inpps, show.outpps,
2149                         show.inbps, show.outbps);
2150
2151         return 0;
2152 }
2153
2154 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2155 {
2156         return single_open_net(inode, file, ip_vs_stats_show);
2157 }
2158
2159 static const struct file_operations ip_vs_stats_fops = {
2160         .owner = THIS_MODULE,
2161         .open = ip_vs_stats_seq_open,
2162         .read = seq_read,
2163         .llseek = seq_lseek,
2164         .release = single_release_net,
2165 };
2166
2167 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2168 {
2169         struct net *net = seq_file_single_net(seq);
2170         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2171         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2172         struct ip_vs_stats_user rates;
2173         int i;
2174
2175 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2176         seq_puts(seq,
2177                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2178         seq_printf(seq,
2179                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2180
2181         for_each_possible_cpu(i) {
2182                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2183                 unsigned int start;
2184                 __u64 inbytes, outbytes;
2185
2186                 do {
2187                         start = u64_stats_fetch_begin_bh(&u->syncp);
2188                         inbytes = u->ustats.inbytes;
2189                         outbytes = u->ustats.outbytes;
2190                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2191
2192                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2193                            i, u->ustats.conns, u->ustats.inpkts,
2194                            u->ustats.outpkts, (__u64)inbytes,
2195                            (__u64)outbytes);
2196         }
2197
2198         spin_lock_bh(&tot_stats->lock);
2199
2200         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2201                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2202                    tot_stats->ustats.outpkts,
2203                    (unsigned long long) tot_stats->ustats.inbytes,
2204                    (unsigned long long) tot_stats->ustats.outbytes);
2205
2206         ip_vs_read_estimator(&rates, tot_stats);
2207
2208         spin_unlock_bh(&tot_stats->lock);
2209
2210 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2211         seq_puts(seq,
2212                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2213         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2214                         rates.cps,
2215                         rates.inpps,
2216                         rates.outpps,
2217                         rates.inbps,
2218                         rates.outbps);
2219
2220         return 0;
2221 }
2222
2223 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2224 {
2225         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2226 }
2227
2228 static const struct file_operations ip_vs_stats_percpu_fops = {
2229         .owner = THIS_MODULE,
2230         .open = ip_vs_stats_percpu_seq_open,
2231         .read = seq_read,
2232         .llseek = seq_lseek,
2233         .release = single_release_net,
2234 };
2235 #endif
2236
2237 /*
2238  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2239  */
2240 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2241 {
2242 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2243         struct ip_vs_proto_data *pd;
2244 #endif
2245
2246         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2247                   u->tcp_timeout,
2248                   u->tcp_fin_timeout,
2249                   u->udp_timeout);
2250
2251 #ifdef CONFIG_IP_VS_PROTO_TCP
2252         if (u->tcp_timeout) {
2253                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2254                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2255                         = u->tcp_timeout * HZ;
2256         }
2257
2258         if (u->tcp_fin_timeout) {
2259                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2260                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2261                         = u->tcp_fin_timeout * HZ;
2262         }
2263 #endif
2264
2265 #ifdef CONFIG_IP_VS_PROTO_UDP
2266         if (u->udp_timeout) {
2267                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2268                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2269                         = u->udp_timeout * HZ;
2270         }
2271 #endif
2272         return 0;
2273 }
2274
2275
2276 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2277 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2278 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2279                                  sizeof(struct ip_vs_dest_user))
2280 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2281 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2282 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2283
2284 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2285         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2286         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2287         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2288         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2289         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2290         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2291         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2292         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2293         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2294         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2295         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2296 };
2297
2298 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2299                                   struct ip_vs_service_user *usvc_compat)
2300 {
2301         memset(usvc, 0, sizeof(*usvc));
2302
2303         usvc->af                = AF_INET;
2304         usvc->protocol          = usvc_compat->protocol;
2305         usvc->addr.ip           = usvc_compat->addr;
2306         usvc->port              = usvc_compat->port;
2307         usvc->fwmark            = usvc_compat->fwmark;
2308
2309         /* Deep copy of sched_name is not needed here */
2310         usvc->sched_name        = usvc_compat->sched_name;
2311
2312         usvc->flags             = usvc_compat->flags;
2313         usvc->timeout           = usvc_compat->timeout;
2314         usvc->netmask           = usvc_compat->netmask;
2315 }
2316
2317 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2318                                    struct ip_vs_dest_user *udest_compat)
2319 {
2320         memset(udest, 0, sizeof(*udest));
2321
2322         udest->addr.ip          = udest_compat->addr;
2323         udest->port             = udest_compat->port;
2324         udest->conn_flags       = udest_compat->conn_flags;
2325         udest->weight           = udest_compat->weight;
2326         udest->u_threshold      = udest_compat->u_threshold;
2327         udest->l_threshold      = udest_compat->l_threshold;
2328 }
2329
2330 static int
2331 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2332 {
2333         struct net *net = sock_net(sk);
2334         int ret;
2335         unsigned char arg[MAX_ARG_LEN];
2336         struct ip_vs_service_user *usvc_compat;
2337         struct ip_vs_service_user_kern usvc;
2338         struct ip_vs_service *svc;
2339         struct ip_vs_dest_user *udest_compat;
2340         struct ip_vs_dest_user_kern udest;
2341         struct netns_ipvs *ipvs = net_ipvs(net);
2342
2343         if (!capable(CAP_NET_ADMIN))
2344                 return -EPERM;
2345
2346         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2347                 return -EINVAL;
2348         if (len < 0 || len >  MAX_ARG_LEN)
2349                 return -EINVAL;
2350         if (len != set_arglen[SET_CMDID(cmd)]) {
2351                 pr_err("set_ctl: len %u != %u\n",
2352                        len, set_arglen[SET_CMDID(cmd)]);
2353                 return -EINVAL;
2354         }
2355
2356         if (copy_from_user(arg, user, len) != 0)
2357                 return -EFAULT;
2358
2359         /* increase the module use count */
2360         ip_vs_use_count_inc();
2361
2362         /* Handle daemons since they have another lock */
2363         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2364             cmd == IP_VS_SO_SET_STOPDAEMON) {
2365                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2366
2367                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2368                         ret = -ERESTARTSYS;
2369                         goto out_dec;
2370                 }
2371                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2372                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2373                                                 dm->syncid);
2374                 else
2375                         ret = stop_sync_thread(net, dm->state);
2376                 mutex_unlock(&ipvs->sync_mutex);
2377                 goto out_dec;
2378         }
2379
2380         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2381                 ret = -ERESTARTSYS;
2382                 goto out_dec;
2383         }
2384
2385         if (cmd == IP_VS_SO_SET_FLUSH) {
2386                 /* Flush the virtual service */
2387                 ret = ip_vs_flush(net);
2388                 goto out_unlock;
2389         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2390                 /* Set timeout values for (tcp tcpfin udp) */
2391                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2392                 goto out_unlock;
2393         }
2394
2395         usvc_compat = (struct ip_vs_service_user *)arg;
2396         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2397
2398         /* We only use the new structs internally, so copy userspace compat
2399          * structs to extended internal versions */
2400         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2401         ip_vs_copy_udest_compat(&udest, udest_compat);
2402
2403         if (cmd == IP_VS_SO_SET_ZERO) {
2404                 /* if no service address is set, zero counters in all */
2405                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2406                         ret = ip_vs_zero_all(net);
2407                         goto out_unlock;
2408                 }
2409         }
2410
2411         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2412         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2413             usvc.protocol != IPPROTO_SCTP) {
2414                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2415                        usvc.protocol, &usvc.addr.ip,
2416                        ntohs(usvc.port), usvc.sched_name);
2417                 ret = -EFAULT;
2418                 goto out_unlock;
2419         }
2420
2421         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2422         if (usvc.fwmark == 0)
2423                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2424                                            &usvc.addr, usvc.port);
2425         else
2426                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2427
2428         if (cmd != IP_VS_SO_SET_ADD
2429             && (svc == NULL || svc->protocol != usvc.protocol)) {
2430                 ret = -ESRCH;
2431                 goto out_unlock;
2432         }
2433
2434         switch (cmd) {
2435         case IP_VS_SO_SET_ADD:
2436                 if (svc != NULL)
2437                         ret = -EEXIST;
2438                 else
2439                         ret = ip_vs_add_service(net, &usvc, &svc);
2440                 break;
2441         case IP_VS_SO_SET_EDIT:
2442                 ret = ip_vs_edit_service(svc, &usvc);
2443                 break;
2444         case IP_VS_SO_SET_DEL:
2445                 ret = ip_vs_del_service(svc);
2446                 if (!ret)
2447                         goto out_unlock;
2448                 break;
2449         case IP_VS_SO_SET_ZERO:
2450                 ret = ip_vs_zero_service(svc);
2451                 break;
2452         case IP_VS_SO_SET_ADDDEST:
2453                 ret = ip_vs_add_dest(svc, &udest);
2454                 break;
2455         case IP_VS_SO_SET_EDITDEST:
2456                 ret = ip_vs_edit_dest(svc, &udest);
2457                 break;
2458         case IP_VS_SO_SET_DELDEST:
2459                 ret = ip_vs_del_dest(svc, &udest);
2460                 break;
2461         default:
2462                 ret = -EINVAL;
2463         }
2464
2465   out_unlock:
2466         mutex_unlock(&__ip_vs_mutex);
2467   out_dec:
2468         /* decrease the module use count */
2469         ip_vs_use_count_dec();
2470
2471         return ret;
2472 }
2473
2474
2475 static void
2476 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2477 {
2478         dst->protocol = src->protocol;
2479         dst->addr = src->addr.ip;
2480         dst->port = src->port;
2481         dst->fwmark = src->fwmark;
2482         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2483         dst->flags = src->flags;
2484         dst->timeout = src->timeout / HZ;
2485         dst->netmask = src->netmask;
2486         dst->num_dests = src->num_dests;
2487         ip_vs_copy_stats(&dst->stats, &src->stats);
2488 }
2489
2490 static inline int
2491 __ip_vs_get_service_entries(struct net *net,
2492                             const struct ip_vs_get_services *get,
2493                             struct ip_vs_get_services __user *uptr)
2494 {
2495         int idx, count=0;
2496         struct ip_vs_service *svc;
2497         struct ip_vs_service_entry entry;
2498         int ret = 0;
2499
2500         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2501                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2502                         /* Only expose IPv4 entries to old interface */
2503                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2504                                 continue;
2505
2506                         if (count >= get->num_services)
2507                                 goto out;
2508                         memset(&entry, 0, sizeof(entry));
2509                         ip_vs_copy_service(&entry, svc);
2510                         if (copy_to_user(&uptr->entrytable[count],
2511                                          &entry, sizeof(entry))) {
2512                                 ret = -EFAULT;
2513                                 goto out;
2514                         }
2515                         count++;
2516                 }
2517         }
2518
2519         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2520                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2521                         /* Only expose IPv4 entries to old interface */
2522                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2523                                 continue;
2524
2525                         if (count >= get->num_services)
2526                                 goto out;
2527                         memset(&entry, 0, sizeof(entry));
2528                         ip_vs_copy_service(&entry, svc);
2529                         if (copy_to_user(&uptr->entrytable[count],
2530                                          &entry, sizeof(entry))) {
2531                                 ret = -EFAULT;
2532                                 goto out;
2533                         }
2534                         count++;
2535                 }
2536         }
2537 out:
2538         return ret;
2539 }
2540
2541 static inline int
2542 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2543                          struct ip_vs_get_dests __user *uptr)
2544 {
2545         struct ip_vs_service *svc;
2546         union nf_inet_addr addr = { .ip = get->addr };
2547         int ret = 0;
2548
2549         if (get->fwmark)
2550                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2551         else
2552                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2553                                            get->port);
2554
2555         if (svc) {
2556                 int count = 0;
2557                 struct ip_vs_dest *dest;
2558                 struct ip_vs_dest_entry entry;
2559
2560                 list_for_each_entry(dest, &svc->destinations, n_list) {
2561                         if (count >= get->num_dests)
2562                                 break;
2563
2564                         entry.addr = dest->addr.ip;
2565                         entry.port = dest->port;
2566                         entry.conn_flags = atomic_read(&dest->conn_flags);
2567                         entry.weight = atomic_read(&dest->weight);
2568                         entry.u_threshold = dest->u_threshold;
2569                         entry.l_threshold = dest->l_threshold;
2570                         entry.activeconns = atomic_read(&dest->activeconns);
2571                         entry.inactconns = atomic_read(&dest->inactconns);
2572                         entry.persistconns = atomic_read(&dest->persistconns);
2573                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2574                         if (copy_to_user(&uptr->entrytable[count],
2575                                          &entry, sizeof(entry))) {
2576                                 ret = -EFAULT;
2577                                 break;
2578                         }
2579                         count++;
2580                 }
2581         } else
2582                 ret = -ESRCH;
2583         return ret;
2584 }
2585
2586 static inline void
2587 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2588 {
2589 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2590         struct ip_vs_proto_data *pd;
2591 #endif
2592
2593 #ifdef CONFIG_IP_VS_PROTO_TCP
2594         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2595         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2596         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2597 #endif
2598 #ifdef CONFIG_IP_VS_PROTO_UDP
2599         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2600         u->udp_timeout =
2601                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2602 #endif
2603 }
2604
2605
2606 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2607 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2608 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2609 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2610 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2611 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2612 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2613
2614 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2615         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2616         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2617         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2618         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2619         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2620         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2621         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2622 };
2623
2624 static int
2625 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2626 {
2627         unsigned char arg[128];
2628         int ret = 0;
2629         unsigned int copylen;
2630         struct net *net = sock_net(sk);
2631         struct netns_ipvs *ipvs = net_ipvs(net);
2632
2633         BUG_ON(!net);
2634         if (!capable(CAP_NET_ADMIN))
2635                 return -EPERM;
2636
2637         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2638                 return -EINVAL;
2639
2640         if (*len < get_arglen[GET_CMDID(cmd)]) {
2641                 pr_err("get_ctl: len %u < %u\n",
2642                        *len, get_arglen[GET_CMDID(cmd)]);
2643                 return -EINVAL;
2644         }
2645
2646         copylen = get_arglen[GET_CMDID(cmd)];
2647         if (copylen > 128)
2648                 return -EINVAL;
2649
2650         if (copy_from_user(arg, user, copylen) != 0)
2651                 return -EFAULT;
2652         /*
2653          * Handle daemons first since it has its own locking
2654          */
2655         if (cmd == IP_VS_SO_GET_DAEMON) {
2656                 struct ip_vs_daemon_user d[2];
2657
2658                 memset(&d, 0, sizeof(d));
2659                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2660                         return -ERESTARTSYS;
2661
2662                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2663                         d[0].state = IP_VS_STATE_MASTER;
2664                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2665                                 sizeof(d[0].mcast_ifn));
2666                         d[0].syncid = ipvs->master_syncid;
2667                 }
2668                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2669                         d[1].state = IP_VS_STATE_BACKUP;
2670                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2671                                 sizeof(d[1].mcast_ifn));
2672                         d[1].syncid = ipvs->backup_syncid;
2673                 }
2674                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2675                         ret = -EFAULT;
2676                 mutex_unlock(&ipvs->sync_mutex);
2677                 return ret;
2678         }
2679
2680         if (mutex_lock_interruptible(&__ip_vs_mutex))
2681                 return -ERESTARTSYS;
2682
2683         switch (cmd) {
2684         case IP_VS_SO_GET_VERSION:
2685         {
2686                 char buf[64];
2687
2688                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2689                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2690                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2691                         ret = -EFAULT;
2692                         goto out;
2693                 }
2694                 *len = strlen(buf)+1;
2695         }
2696         break;
2697
2698         case IP_VS_SO_GET_INFO:
2699         {
2700                 struct ip_vs_getinfo info;
2701                 info.version = IP_VS_VERSION_CODE;
2702                 info.size = ip_vs_conn_tab_size;
2703                 info.num_services = ipvs->num_services;
2704                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2705                         ret = -EFAULT;
2706         }
2707         break;
2708
2709         case IP_VS_SO_GET_SERVICES:
2710         {
2711                 struct ip_vs_get_services *get;
2712                 int size;
2713
2714                 get = (struct ip_vs_get_services *)arg;
2715                 size = sizeof(*get) +
2716                         sizeof(struct ip_vs_service_entry) * get->num_services;
2717                 if (*len != size) {
2718                         pr_err("length: %u != %u\n", *len, size);
2719                         ret = -EINVAL;
2720                         goto out;
2721                 }
2722                 ret = __ip_vs_get_service_entries(net, get, user);
2723         }
2724         break;
2725
2726         case IP_VS_SO_GET_SERVICE:
2727         {
2728                 struct ip_vs_service_entry *entry;
2729                 struct ip_vs_service *svc;
2730                 union nf_inet_addr addr;
2731
2732                 entry = (struct ip_vs_service_entry *)arg;
2733                 addr.ip = entry->addr;
2734                 if (entry->fwmark)
2735                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2736                 else
2737                         svc = __ip_vs_service_find(net, AF_INET,
2738                                                    entry->protocol, &addr,
2739                                                    entry->port);
2740                 if (svc) {
2741                         ip_vs_copy_service(entry, svc);
2742                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2743                                 ret = -EFAULT;
2744                 } else
2745                         ret = -ESRCH;
2746         }
2747         break;
2748
2749         case IP_VS_SO_GET_DESTS:
2750         {
2751                 struct ip_vs_get_dests *get;
2752                 int size;
2753
2754                 get = (struct ip_vs_get_dests *)arg;
2755                 size = sizeof(*get) +
2756                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2757                 if (*len != size) {
2758                         pr_err("length: %u != %u\n", *len, size);
2759                         ret = -EINVAL;
2760                         goto out;
2761                 }
2762                 ret = __ip_vs_get_dest_entries(net, get, user);
2763         }
2764         break;
2765
2766         case IP_VS_SO_GET_TIMEOUT:
2767         {
2768                 struct ip_vs_timeout_user t;
2769
2770                 memset(&t, 0, sizeof(t));
2771                 __ip_vs_get_timeouts(net, &t);
2772                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2773                         ret = -EFAULT;
2774         }
2775         break;
2776
2777         default:
2778                 ret = -EINVAL;
2779         }
2780
2781 out:
2782         mutex_unlock(&__ip_vs_mutex);
2783         return ret;
2784 }
2785
2786
2787 static struct nf_sockopt_ops ip_vs_sockopts = {
2788         .pf             = PF_INET,
2789         .set_optmin     = IP_VS_BASE_CTL,
2790         .set_optmax     = IP_VS_SO_SET_MAX+1,
2791         .set            = do_ip_vs_set_ctl,
2792         .get_optmin     = IP_VS_BASE_CTL,
2793         .get_optmax     = IP_VS_SO_GET_MAX+1,
2794         .get            = do_ip_vs_get_ctl,
2795         .owner          = THIS_MODULE,
2796 };
2797
2798 /*
2799  * Generic Netlink interface
2800  */
2801
2802 /* IPVS genetlink family */
2803 static struct genl_family ip_vs_genl_family = {
2804         .id             = GENL_ID_GENERATE,
2805         .hdrsize        = 0,
2806         .name           = IPVS_GENL_NAME,
2807         .version        = IPVS_GENL_VERSION,
2808         .maxattr        = IPVS_CMD_MAX,
2809         .netnsok        = true,         /* Make ipvsadm to work on netns */
2810 };
2811
2812 /* Policy used for first-level command attributes */
2813 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2814         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2815         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2816         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2817         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2818         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2819         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2820 };
2821
2822 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2823 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2824         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2825         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2826                                             .len = IP_VS_IFNAME_MAXLEN },
2827         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2828 };
2829
2830 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2831 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2832         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2833         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2834         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2835                                             .len = sizeof(union nf_inet_addr) },
2836         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2837         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2838         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2839                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2840         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2841                                             .len = IP_VS_PENAME_MAXLEN },
2842         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2843                                             .len = sizeof(struct ip_vs_flags) },
2844         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2845         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2846         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2847 };
2848
2849 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2850 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2851         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2852                                             .len = sizeof(union nf_inet_addr) },
2853         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2854         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2855         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2856         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2857         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2858         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2859         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2860         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2861         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2862 };
2863
2864 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2865                                  struct ip_vs_stats *stats)
2866 {
2867         struct ip_vs_stats_user ustats;
2868         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2869         if (!nl_stats)
2870                 return -EMSGSIZE;
2871
2872         ip_vs_copy_stats(&ustats, stats);
2873
2874         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2875             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2876             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2877             nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2878             nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2879             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2880             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2881             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2882             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2883             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2884                 goto nla_put_failure;
2885         nla_nest_end(skb, nl_stats);
2886
2887         return 0;
2888
2889 nla_put_failure:
2890         nla_nest_cancel(skb, nl_stats);
2891         return -EMSGSIZE;
2892 }
2893
2894 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2895                                    struct ip_vs_service *svc)
2896 {
2897         struct nlattr *nl_service;
2898         struct ip_vs_flags flags = { .flags = svc->flags,
2899                                      .mask = ~0 };
2900
2901         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2902         if (!nl_service)
2903                 return -EMSGSIZE;
2904
2905         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2906                 goto nla_put_failure;
2907         if (svc->fwmark) {
2908                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2909                         goto nla_put_failure;
2910         } else {
2911                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2912                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2913                     nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2914                         goto nla_put_failure;
2915         }
2916
2917         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2918             (svc->pe &&
2919              nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2920             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2921             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2922             nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2923                 goto nla_put_failure;
2924         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2925                 goto nla_put_failure;
2926
2927         nla_nest_end(skb, nl_service);
2928
2929         return 0;
2930
2931 nla_put_failure:
2932         nla_nest_cancel(skb, nl_service);
2933         return -EMSGSIZE;
2934 }
2935
2936 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2937                                    struct ip_vs_service *svc,
2938                                    struct netlink_callback *cb)
2939 {
2940         void *hdr;
2941
2942         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2943                           &ip_vs_genl_family, NLM_F_MULTI,
2944                           IPVS_CMD_NEW_SERVICE);
2945         if (!hdr)
2946                 return -EMSGSIZE;
2947
2948         if (ip_vs_genl_fill_service(skb, svc) < 0)
2949                 goto nla_put_failure;
2950
2951         return genlmsg_end(skb, hdr);
2952
2953 nla_put_failure:
2954         genlmsg_cancel(skb, hdr);
2955         return -EMSGSIZE;
2956 }
2957
2958 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2959                                     struct netlink_callback *cb)
2960 {
2961         int idx = 0, i;
2962         int start = cb->args[0];
2963         struct ip_vs_service *svc;
2964         struct net *net = skb_sknet(skb);
2965
2966         mutex_lock(&__ip_vs_mutex);
2967         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2968                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2969                         if (++idx <= start || !net_eq(svc->net, net))
2970                                 continue;
2971                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2972                                 idx--;
2973                                 goto nla_put_failure;
2974                         }
2975                 }
2976         }
2977
2978         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2979                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2980                         if (++idx <= start || !net_eq(svc->net, net))
2981                                 continue;
2982                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2983                                 idx--;
2984                                 goto nla_put_failure;
2985                         }
2986                 }
2987         }
2988
2989 nla_put_failure:
2990         mutex_unlock(&__ip_vs_mutex);
2991         cb->args[0] = idx;
2992
2993         return skb->len;
2994 }
2995
2996 static int ip_vs_genl_parse_service(struct net *net,
2997                                     struct ip_vs_service_user_kern *usvc,
2998                                     struct nlattr *nla, int full_entry,
2999                                     struct ip_vs_service **ret_svc)
3000 {
3001         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3002         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3003         struct ip_vs_service *svc;
3004
3005         /* Parse mandatory identifying service fields first */
3006         if (nla == NULL ||
3007             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3008                 return -EINVAL;
3009
3010         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3011         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3012         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3013         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3014         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3015
3016         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3017                 return -EINVAL;
3018
3019         memset(usvc, 0, sizeof(*usvc));
3020
3021         usvc->af = nla_get_u16(nla_af);
3022 #ifdef CONFIG_IP_VS_IPV6
3023         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3024 #else
3025         if (usvc->af != AF_INET)
3026 #endif
3027                 return -EAFNOSUPPORT;
3028
3029         if (nla_fwmark) {
3030                 usvc->protocol = IPPROTO_TCP;
3031                 usvc->fwmark = nla_get_u32(nla_fwmark);
3032         } else {
3033                 usvc->protocol = nla_get_u16(nla_protocol);
3034                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3035                 usvc->port = nla_get_u16(nla_port);
3036                 usvc->fwmark = 0;
3037         }
3038
3039         if (usvc->fwmark)
3040                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3041         else
3042                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3043                                            &usvc->addr, usvc->port);
3044         *ret_svc = svc;
3045
3046         /* If a full entry was requested, check for the additional fields */
3047         if (full_entry) {
3048                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3049                               *nla_netmask;
3050                 struct ip_vs_flags flags;
3051
3052                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3053                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3054                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3055                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3056                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3057
3058                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3059                         return -EINVAL;
3060
3061                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3062
3063                 /* prefill flags from service if it already exists */
3064                 if (svc)
3065                         usvc->flags = svc->flags;
3066
3067                 /* set new flags from userland */
3068                 usvc->flags = (usvc->flags & ~flags.mask) |
3069                               (flags.flags & flags.mask);
3070                 usvc->sched_name = nla_data(nla_sched);
3071                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3072                 usvc->timeout = nla_get_u32(nla_timeout);
3073                 usvc->netmask = nla_get_u32(nla_netmask);
3074         }
3075
3076         return 0;
3077 }
3078
3079 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3080                                                      struct nlattr *nla)
3081 {
3082         struct ip_vs_service_user_kern usvc;
3083         struct ip_vs_service *svc;
3084         int ret;
3085
3086         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3087         return ret ? ERR_PTR(ret) : svc;
3088 }
3089
3090 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3091 {
3092         struct nlattr *nl_dest;
3093
3094         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3095         if (!nl_dest)
3096                 return -EMSGSIZE;
3097
3098         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3099             nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3100             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3101                         (atomic_read(&dest->conn_flags) &
3102                          IP_VS_CONN_F_FWD_MASK)) ||
3103             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3104                         atomic_read(&dest->weight)) ||
3105             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3106             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3107             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3108                         atomic_read(&dest->activeconns)) ||
3109             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3110                         atomic_read(&dest->inactconns)) ||
3111             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3112                         atomic_read(&dest->persistconns)))
3113                 goto nla_put_failure;
3114         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3115                 goto nla_put_failure;
3116
3117         nla_nest_end(skb, nl_dest);
3118
3119         return 0;
3120
3121 nla_put_failure:
3122         nla_nest_cancel(skb, nl_dest);
3123         return -EMSGSIZE;
3124 }
3125
3126 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3127                                 struct netlink_callback *cb)
3128 {
3129         void *hdr;
3130
3131         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3132                           &ip_vs_genl_family, NLM_F_MULTI,
3133                           IPVS_CMD_NEW_DEST);
3134         if (!hdr)
3135                 return -EMSGSIZE;
3136
3137         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3138                 goto nla_put_failure;
3139
3140         return genlmsg_end(skb, hdr);
3141
3142 nla_put_failure:
3143         genlmsg_cancel(skb, hdr);
3144         return -EMSGSIZE;
3145 }
3146
3147 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3148                                  struct netlink_callback *cb)
3149 {
3150         int idx = 0;
3151         int start = cb->args[0];
3152         struct ip_vs_service *svc;
3153         struct ip_vs_dest *dest;
3154         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3155         struct net *net = skb_sknet(skb);
3156
3157         mutex_lock(&__ip_vs_mutex);
3158
3159         /* Try to find the service for which to dump destinations */
3160         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3161                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3162                 goto out_err;
3163
3164
3165         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3166         if (IS_ERR(svc) || svc == NULL)
3167                 goto out_err;
3168
3169         /* Dump the destinations */
3170         list_for_each_entry(dest, &svc->destinations, n_list) {
3171                 if (++idx <= start)
3172                         continue;
3173                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3174                         idx--;
3175                         goto nla_put_failure;
3176                 }
3177         }
3178
3179 nla_put_failure:
3180         cb->args[0] = idx;
3181
3182 out_err:
3183         mutex_unlock(&__ip_vs_mutex);
3184
3185         return skb->len;
3186 }
3187
3188 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3189                                  struct nlattr *nla, int full_entry)
3190 {
3191         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3192         struct nlattr *nla_addr, *nla_port;
3193
3194         /* Parse mandatory identifying destination fields first */
3195         if (nla == NULL ||
3196             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3197                 return -EINVAL;
3198
3199         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3200         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3201
3202         if (!(nla_addr && nla_port))
3203                 return -EINVAL;
3204
3205         memset(udest, 0, sizeof(*udest));
3206
3207         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3208         udest->port = nla_get_u16(nla_port);
3209
3210         /* If a full entry was requested, check for the additional fields */
3211         if (full_entry) {
3212                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3213                               *nla_l_thresh;
3214
3215                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3216                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3217                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3218                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3219
3220                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3221                         return -EINVAL;
3222
3223                 udest->conn_flags = nla_get_u32(nla_fwd)
3224                                     & IP_VS_CONN_F_FWD_MASK;
3225                 udest->weight = nla_get_u32(nla_weight);
3226                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3227                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3228         }
3229
3230         return 0;
3231 }
3232
3233 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3234                                   const char *mcast_ifn, __be32 syncid)
3235 {
3236         struct nlattr *nl_daemon;
3237
3238         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3239         if (!nl_daemon)
3240                 return -EMSGSIZE;
3241
3242         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3243             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3244             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3245                 goto nla_put_failure;
3246         nla_nest_end(skb, nl_daemon);
3247
3248         return 0;
3249
3250 nla_put_failure:
3251         nla_nest_cancel(skb, nl_daemon);
3252         return -EMSGSIZE;
3253 }
3254
3255 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3256                                   const char *mcast_ifn, __be32 syncid,
3257                                   struct netlink_callback *cb)
3258 {
3259         void *hdr;
3260         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3261                           &ip_vs_genl_family, NLM_F_MULTI,
3262                           IPVS_CMD_NEW_DAEMON);
3263         if (!hdr)
3264                 return -EMSGSIZE;
3265
3266         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3267                 goto nla_put_failure;
3268
3269         return genlmsg_end(skb, hdr);
3270
3271 nla_put_failure:
3272         genlmsg_cancel(skb, hdr);
3273         return -EMSGSIZE;
3274 }
3275
3276 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3277                                    struct netlink_callback *cb)
3278 {
3279         struct net *net = skb_sknet(skb);
3280         struct netns_ipvs *ipvs = net_ipvs(net);
3281
3282         mutex_lock(&ipvs->sync_mutex);
3283         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3284                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3285                                            ipvs->master_mcast_ifn,
3286                                            ipvs->master_syncid, cb) < 0)
3287                         goto nla_put_failure;
3288
3289                 cb->args[0] = 1;
3290         }
3291
3292         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3293                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3294                                            ipvs->backup_mcast_ifn,
3295                                            ipvs->backup_syncid, cb) < 0)
3296                         goto nla_put_failure;
3297
3298                 cb->args[1] = 1;
3299         }
3300
3301 nla_put_failure:
3302         mutex_unlock(&ipvs->sync_mutex);
3303
3304         return skb->len;
3305 }
3306
3307 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3308 {
3309         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3310               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3311               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3312                 return -EINVAL;
3313
3314         return start_sync_thread(net,
3315                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3316                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3317                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3318 }
3319
3320 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3321 {
3322         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3323                 return -EINVAL;
3324
3325         return stop_sync_thread(net,
3326                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3327 }
3328
3329 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3330 {
3331         struct ip_vs_timeout_user t;
3332
3333         __ip_vs_get_timeouts(net, &t);
3334
3335         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3336                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3337
3338         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3339                 t.tcp_fin_timeout =
3340                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3341
3342         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3343                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3344
3345         return ip_vs_set_timeout(net, &t);
3346 }
3347
3348 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3349 {
3350         int ret = 0, cmd;
3351         struct net *net;
3352         struct netns_ipvs *ipvs;
3353
3354         net = skb_sknet(skb);
3355         ipvs = net_ipvs(net);
3356         cmd = info->genlhdr->cmd;
3357
3358         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3359                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3360
3361                 mutex_lock(&ipvs->sync_mutex);
3362                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3363                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3364                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3365                                      ip_vs_daemon_policy)) {
3366                         ret = -EINVAL;
3367                         goto out;
3368                 }
3369
3370                 if (cmd == IPVS_CMD_NEW_DAEMON)
3371                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3372                 else
3373                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3374 out:
3375                 mutex_unlock(&ipvs->sync_mutex);
3376         }
3377         return ret;
3378 }
3379
3380 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3381 {
3382         struct ip_vs_service *svc = NULL;
3383         struct ip_vs_service_user_kern usvc;
3384         struct ip_vs_dest_user_kern udest;
3385         int ret = 0, cmd;
3386         int need_full_svc = 0, need_full_dest = 0;
3387         struct net *net;
3388
3389         net = skb_sknet(skb);
3390         cmd = info->genlhdr->cmd;
3391
3392         mutex_lock(&__ip_vs_mutex);
3393
3394         if (cmd == IPVS_CMD_FLUSH) {
3395                 ret = ip_vs_flush(net);
3396                 goto out;
3397         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3398                 ret = ip_vs_genl_set_config(net, info->attrs);
3399                 goto out;
3400         } else if (cmd == IPVS_CMD_ZERO &&
3401                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3402                 ret = ip_vs_zero_all(net);
3403                 goto out;
3404         }
3405
3406         /* All following commands require a service argument, so check if we
3407          * received a valid one. We need a full service specification when
3408          * adding / editing a service. Only identifying members otherwise. */
3409         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3410                 need_full_svc = 1;
3411
3412         ret = ip_vs_genl_parse_service(net, &usvc,
3413                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3414                                        need_full_svc, &svc);
3415         if (ret)
3416                 goto out;
3417
3418         /* Unless we're adding a new service, the service must already exist */
3419         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3420                 ret = -ESRCH;
3421                 goto out;
3422         }
3423
3424         /* Destination commands require a valid destination argument. For
3425          * adding / editing a destination, we need a full destination
3426          * specification. */
3427         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3428             cmd == IPVS_CMD_DEL_DEST) {
3429                 if (cmd != IPVS_CMD_DEL_DEST)
3430                         need_full_dest = 1;
3431
3432                 ret = ip_vs_genl_parse_dest(&udest,
3433                                             info->attrs[IPVS_CMD_ATTR_DEST],
3434                                             need_full_dest);
3435                 if (ret)
3436                         goto out;
3437         }
3438
3439         switch (cmd) {
3440         case IPVS_CMD_NEW_SERVICE:
3441                 if (svc == NULL)
3442                         ret = ip_vs_add_service(net, &usvc, &svc);
3443                 else
3444                         ret = -EEXIST;
3445                 break;
3446         case IPVS_CMD_SET_SERVICE:
3447                 ret = ip_vs_edit_service(svc, &usvc);
3448                 break;
3449         case IPVS_CMD_DEL_SERVICE:
3450                 ret = ip_vs_del_service(svc);
3451                 /* do not use svc, it can be freed */
3452                 break;
3453         case IPVS_CMD_NEW_DEST:
3454                 ret = ip_vs_add_dest(svc, &udest);
3455                 break;
3456         case IPVS_CMD_SET_DEST:
3457                 ret = ip_vs_edit_dest(svc, &udest);
3458                 break;
3459         case IPVS_CMD_DEL_DEST:
3460                 ret = ip_vs_del_dest(svc, &udest);
3461                 break;
3462         case IPVS_CMD_ZERO:
3463                 ret = ip_vs_zero_service(svc);
3464                 break;
3465         default:
3466                 ret = -EINVAL;
3467         }
3468
3469 out:
3470         mutex_unlock(&__ip_vs_mutex);
3471
3472         return ret;
3473 }
3474
3475 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3476 {
3477         struct sk_buff *msg;
3478         void *reply;
3479         int ret, cmd, reply_cmd;
3480         struct net *net;
3481
3482         net = skb_sknet(skb);
3483         cmd = info->genlhdr->cmd;
3484
3485         if (cmd == IPVS_CMD_GET_SERVICE)
3486                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3487         else if (cmd == IPVS_CMD_GET_INFO)
3488                 reply_cmd = IPVS_CMD_SET_INFO;
3489         else if (cmd == IPVS_CMD_GET_CONFIG)
3490                 reply_cmd = IPVS_CMD_SET_CONFIG;
3491         else {
3492                 pr_err("unknown Generic Netlink command\n");
3493                 return -EINVAL;
3494         }
3495
3496         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3497         if (!msg)
3498                 return -ENOMEM;
3499
3500         mutex_lock(&__ip_vs_mutex);
3501
3502         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3503         if (reply == NULL)
3504                 goto nla_put_failure;
3505
3506         switch (cmd) {
3507         case IPVS_CMD_GET_SERVICE:
3508         {
3509                 struct ip_vs_service *svc;
3510
3511                 svc = ip_vs_genl_find_service(net,
3512                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3513                 if (IS_ERR(svc)) {
3514                         ret = PTR_ERR(svc);
3515                         goto out_err;
3516                 } else if (svc) {
3517                         ret = ip_vs_genl_fill_service(msg, svc);
3518                         if (ret)
3519                                 goto nla_put_failure;
3520                 } else {
3521                         ret = -ESRCH;
3522                         goto out_err;
3523                 }
3524
3525                 break;
3526         }
3527
3528         case IPVS_CMD_GET_CONFIG:
3529         {
3530                 struct ip_vs_timeout_user t;
3531
3532                 __ip_vs_get_timeouts(net, &t);
3533 #ifdef CONFIG_IP_VS_PROTO_TCP
3534                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3535                                 t.tcp_timeout) ||
3536                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3537                                 t.tcp_fin_timeout))
3538                         goto nla_put_failure;
3539 #endif
3540 #ifdef CONFIG_IP_VS_PROTO_UDP
3541                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3542                         goto nla_put_failure;
3543 #endif
3544
3545                 break;
3546         }
3547
3548         case IPVS_CMD_GET_INFO:
3549                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3550                                 IP_VS_VERSION_CODE) ||
3551                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3552                                 ip_vs_conn_tab_size))
3553                         goto nla_put_failure;
3554                 break;
3555         }
3556
3557         genlmsg_end(msg, reply);
3558         ret = genlmsg_reply(msg, info);
3559         goto out;
3560
3561 nla_put_failure:
3562         pr_err("not enough space in Netlink message\n");
3563         ret = -EMSGSIZE;
3564
3565 out_err:
3566         nlmsg_free(msg);
3567 out:
3568         mutex_unlock(&__ip_vs_mutex);
3569
3570         return ret;
3571 }
3572
3573
3574 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3575         {
3576                 .cmd    = IPVS_CMD_NEW_SERVICE,
3577                 .flags  = GENL_ADMIN_PERM,
3578                 .policy = ip_vs_cmd_policy,
3579                 .doit   = ip_vs_genl_set_cmd,
3580         },
3581         {
3582                 .cmd    = IPVS_CMD_SET_SERVICE,
3583                 .flags  = GENL_ADMIN_PERM,
3584                 .policy = ip_vs_cmd_policy,
3585                 .doit   = ip_vs_genl_set_cmd,
3586         },
3587         {
3588                 .cmd    = IPVS_CMD_DEL_SERVICE,
3589                 .flags  = GENL_ADMIN_PERM,
3590                 .policy = ip_vs_cmd_policy,
3591                 .doit   = ip_vs_genl_set_cmd,
3592         },
3593         {
3594                 .cmd    = IPVS_CMD_GET_SERVICE,
3595                 .flags  = GENL_ADMIN_PERM,
3596                 .doit   = ip_vs_genl_get_cmd,
3597                 .dumpit = ip_vs_genl_dump_services,
3598                 .policy = ip_vs_cmd_policy,
3599         },
3600         {
3601                 .cmd    = IPVS_CMD_NEW_DEST,
3602                 .flags  = GENL_ADMIN_PERM,
3603                 .policy = ip_vs_cmd_policy,
3604                 .doit   = ip_vs_genl_set_cmd,
3605         },
3606         {
3607                 .cmd    = IPVS_CMD_SET_DEST,
3608                 .flags  = GENL_ADMIN_PERM,
3609                 .policy = ip_vs_cmd_policy,
3610                 .doit   = ip_vs_genl_set_cmd,
3611         },
3612         {
3613                 .cmd    = IPVS_CMD_DEL_DEST,
3614                 .flags  = GENL_ADMIN_PERM,
3615                 .policy = ip_vs_cmd_policy,
3616                 .doit   = ip_vs_genl_set_cmd,
3617         },
3618         {
3619                 .cmd    = IPVS_CMD_GET_DEST,
3620                 .flags  = GENL_ADMIN_PERM,
3621                 .policy = ip_vs_cmd_policy,
3622                 .dumpit = ip_vs_genl_dump_dests,
3623         },
3624         {
3625                 .cmd    = IPVS_CMD_NEW_DAEMON,
3626                 .flags  = GENL_ADMIN_PERM,
3627                 .policy = ip_vs_cmd_policy,
3628                 .doit   = ip_vs_genl_set_daemon,
3629         },
3630         {
3631                 .cmd    = IPVS_CMD_DEL_DAEMON,
3632                 .flags  = GENL_ADMIN_PERM,
3633                 .policy = ip_vs_cmd_policy,
3634                 .doit   = ip_vs_genl_set_daemon,
3635         },
3636         {
3637                 .cmd    = IPVS_CMD_GET_DAEMON,
3638                 .flags  = GENL_ADMIN_PERM,
3639                 .dumpit = ip_vs_genl_dump_daemons,
3640         },
3641         {
3642                 .cmd    = IPVS_CMD_SET_CONFIG,
3643                 .flags  = GENL_ADMIN_PERM,
3644                 .policy = ip_vs_cmd_policy,
3645                 .doit   = ip_vs_genl_set_cmd,
3646         },
3647         {
3648                 .cmd    = IPVS_CMD_GET_CONFIG,
3649                 .flags  = GENL_ADMIN_PERM,
3650                 .doit   = ip_vs_genl_get_cmd,
3651         },
3652         {
3653                 .cmd    = IPVS_CMD_GET_INFO,
3654                 .flags  = GENL_ADMIN_PERM,
3655                 .doit   = ip_vs_genl_get_cmd,
3656         },
3657         {
3658                 .cmd    = IPVS_CMD_ZERO,
3659                 .flags  = GENL_ADMIN_PERM,
3660                 .policy = ip_vs_cmd_policy,
3661                 .doit   = ip_vs_genl_set_cmd,
3662         },
3663         {
3664                 .cmd    = IPVS_CMD_FLUSH,
3665                 .flags  = GENL_ADMIN_PERM,
3666                 .doit   = ip_vs_genl_set_cmd,
3667         },
3668 };
3669
3670 static int __init ip_vs_genl_register(void)
3671 {
3672         return genl_register_family_with_ops(&ip_vs_genl_family,
3673                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3674 }
3675
3676 static void ip_vs_genl_unregister(void)
3677 {
3678         genl_unregister_family(&ip_vs_genl_family);
3679 }
3680
3681 /* End of Generic Netlink interface definitions */
3682
3683 /*
3684  * per netns intit/exit func.
3685  */
3686 #ifdef CONFIG_SYSCTL
3687 static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3688 {
3689         int idx;
3690         struct netns_ipvs *ipvs = net_ipvs(net);
3691         struct ctl_table *tbl;
3692
3693         atomic_set(&ipvs->dropentry, 0);
3694         spin_lock_init(&ipvs->dropentry_lock);
3695         spin_lock_init(&ipvs->droppacket_lock);
3696         spin_lock_init(&ipvs->securetcp_lock);
3697
3698         if (!net_eq(net, &init_net)) {
3699                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3700                 if (tbl == NULL)
3701                         return -ENOMEM;
3702         } else
3703                 tbl = vs_vars;
3704         /* Initialize sysctl defaults */
3705         idx = 0;
3706         ipvs->sysctl_amemthresh = 1024;
3707         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3708         ipvs->sysctl_am_droprate = 10;
3709         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3710         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3711         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3712 #ifdef CONFIG_IP_VS_NFCT
3713         tbl[idx++].data = &ipvs->sysctl_conntrack;
3714 #endif
3715         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3716         ipvs->sysctl_snat_reroute = 1;
3717         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3718         ipvs->sysctl_sync_ver = 1;
3719         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3720         ipvs->sysctl_sync_ports = 1;
3721         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3722         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3723         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3724         ipvs->sysctl_sync_sock_size = 0;
3725         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3726         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3727         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3728         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3729         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3730         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3731         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3732         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3733         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3734         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3735         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3736         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3737         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3738         ipvs->sysctl_pmtu_disc = 1;
3739         tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3740
3741
3742         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3743         if (ipvs->sysctl_hdr == NULL) {
3744                 if (!net_eq(net, &init_net))
3745                         kfree(tbl);
3746                 return -ENOMEM;
3747         }
3748         ip_vs_start_estimator(net, &ipvs->tot_stats);
3749         ipvs->sysctl_tbl = tbl;
3750         /* Schedule defense work */
3751         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3752         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3753
3754         return 0;
3755 }
3756
3757 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3758 {
3759         struct netns_ipvs *ipvs = net_ipvs(net);
3760
3761         cancel_delayed_work_sync(&ipvs->defense_work);
3762         cancel_work_sync(&ipvs->defense_work.work);
3763         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3764 }
3765
3766 #else
3767
3768 static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3769 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3770
3771 #endif
3772
3773 static struct notifier_block ip_vs_dst_notifier = {
3774         .notifier_call = ip_vs_dst_event,
3775 };
3776
3777 int __net_init ip_vs_control_net_init(struct net *net)
3778 {
3779         int idx;
3780         struct netns_ipvs *ipvs = net_ipvs(net);
3781
3782         rwlock_init(&ipvs->rs_lock);
3783
3784         /* Initialize rs_table */
3785         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3786                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3787
3788         INIT_LIST_HEAD(&ipvs->dest_trash);
3789         atomic_set(&ipvs->ftpsvc_counter, 0);
3790         atomic_set(&ipvs->nullsvc_counter, 0);
3791
3792         /* procfs stats */
3793         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3794         if (!ipvs->tot_stats.cpustats)
3795                 return -ENOMEM;
3796
3797         spin_lock_init(&ipvs->tot_stats.lock);
3798
3799         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3800         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3801         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3802                              &ip_vs_stats_percpu_fops);
3803
3804         if (ip_vs_control_net_init_sysctl(net))
3805                 goto err;
3806
3807         return 0;
3808
3809 err:
3810         free_percpu(ipvs->tot_stats.cpustats);
3811         return -ENOMEM;
3812 }
3813
3814 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3815 {
3816         struct netns_ipvs *ipvs = net_ipvs(net);
3817
3818         ip_vs_trash_cleanup(net);
3819         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3820         ip_vs_control_net_cleanup_sysctl(net);
3821         proc_net_remove(net, "ip_vs_stats_percpu");
3822         proc_net_remove(net, "ip_vs_stats");
3823         proc_net_remove(net, "ip_vs");
3824         free_percpu(ipvs->tot_stats.cpustats);
3825 }
3826
3827 int __init ip_vs_register_nl_ioctl(void)
3828 {
3829         int ret;
3830
3831         ret = nf_register_sockopt(&ip_vs_sockopts);
3832         if (ret) {
3833                 pr_err("cannot register sockopt.\n");
3834                 goto err_sock;
3835         }
3836
3837         ret = ip_vs_genl_register();
3838         if (ret) {
3839                 pr_err("cannot register Generic Netlink interface.\n");
3840                 goto err_genl;
3841         }
3842         return 0;
3843
3844 err_genl:
3845         nf_unregister_sockopt(&ip_vs_sockopts);
3846 err_sock:
3847         return ret;
3848 }
3849
3850 void ip_vs_unregister_nl_ioctl(void)
3851 {
3852         ip_vs_genl_unregister();
3853         nf_unregister_sockopt(&ip_vs_sockopts);
3854 }
3855
3856 int __init ip_vs_control_init(void)
3857 {
3858         int idx;
3859         int ret;
3860
3861         EnterFunction(2);
3862
3863         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3864         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3865                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3866                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3867         }
3868
3869         smp_wmb();      /* Do we really need it now ? */
3870
3871         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3872         if (ret < 0)
3873                 return ret;
3874
3875         LeaveFunction(2);
3876         return 0;
3877 }
3878
3879
3880 void ip_vs_control_cleanup(void)
3881 {
3882         EnterFunction(2);
3883         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3884         LeaveFunction(2);
3885 }