multipath: handle offlined paths
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19
20 /*
21  * libcheckers
22  */
23 #include <checkers.h>
24
25 /*
26  * libmultipath
27  */
28 #include <parser.h>
29 #include <vector.h>
30 #include <memory.h>
31 #include <config.h>
32 #include <util.h>
33 #include <hwtable.h>
34 #include <defaults.h>
35 #include <structs.h>
36 #include <callout.h>
37 #include <blacklist.h>
38 #include <structs_vec.h>
39 #include <dmparser.h>
40 #include <devmapper.h>
41 #include <sysfs.h>
42 #include <dict.h>
43 #include <discovery.h>
44 #include <debug.h>
45 #include <propsel.h>
46 #include <uevent.h>
47 #include <switchgroup.h>
48 #include <print.h>
49 #include <configure.h>
50 #include <prio.h>
51 #include <pgpolicies.h>
52 #include <uevent.h>
53
54 #include "main.h"
55 #include "pidfile.h"
56 #include "uxlsnr.h"
57 #include "uxclnt.h"
58 #include "cli.h"
59 #include "cli_handlers.h"
60 #include "lock.h"
61 #include "waiter.h"
62
63 #define FILE_NAME_SIZE 256
64 #define CMDSIZE 160
65
66 #define LOG_MSG(a, b) \
67 do { \
68         if (pp->offline) \
69                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
70         else if (strlen(b)) \
71                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
72 } while(0)
73
74 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
75 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
76
77 int logsink;
78 enum daemon_status running_state;
79 pid_t daemon_pid;
80
81 /*
82  * global copy of vecs for use in sig handlers
83  */
84 struct vectors * gvecs;
85
86 static int
87 need_switch_pathgroup (struct multipath * mpp, int refresh)
88 {
89         struct pathgroup * pgp;
90         struct path * pp;
91         unsigned int i, j;
92
93         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
94                 return 0;
95
96         /*
97          * Refresh path priority values
98          */
99         if (refresh)
100                 vector_foreach_slot (mpp->pg, pgp, i)
101                         vector_foreach_slot (pgp->paths, pp, j)
102                                 pathinfo(pp, conf->hwtable, DI_PRIO);
103
104         mpp->bestpg = select_path_group(mpp);
105
106         if (mpp->bestpg != mpp->nextpg)
107                 return 1;
108
109         return 0;
110 }
111
112 static void
113 switch_pathgroup (struct multipath * mpp)
114 {
115         mpp->stat_switchgroup++;
116         dm_switchgroup(mpp->alias, mpp->bestpg);
117         condlog(2, "%s: switch to path group #%i",
118                  mpp->alias, mpp->bestpg);
119 }
120
121 static int
122 coalesce_maps(struct vectors *vecs, vector nmpv)
123 {
124         struct multipath * ompp;
125         vector ompv = vecs->mpvec;
126         unsigned int i;
127         int j;
128
129         vector_foreach_slot (ompv, ompp, i) {
130                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
131                         /*
132                          * remove all current maps not allowed by the
133                          * current configuration
134                          */
135                         if (dm_flush_map(ompp->alias)) {
136                                 condlog(0, "%s: unable to flush devmap",
137                                         ompp->alias);
138                                 /*
139                                  * may be just because the device is open
140                                  */
141                                 if (!vector_alloc_slot(nmpv))
142                                         return 1;
143
144                                 vector_set_slot(nmpv, ompp);
145                                 setup_multipath(vecs, ompp);
146
147                                 if ((j = find_slot(ompv, (void *)ompp)) != -1)
148                                         vector_del_slot(ompv, j);
149
150                                 continue;
151                         }
152                         else {
153                                 dm_lib_release();
154                                 condlog(2, "%s devmap removed", ompp->alias);
155                         }
156                 } else if (conf->reassign_maps) {
157                         condlog(3, "%s: Reassign existing device-mapper"
158                                 " devices", ompp->alias);
159                         dm_reassign(ompp->alias);
160                 }
161         }
162         return 0;
163 }
164
165 void
166 sync_map_state(struct multipath *mpp)
167 {
168         struct pathgroup *pgp;
169         struct path *pp;
170         unsigned int i, j;
171
172         if (!mpp->pg)
173                 return;
174
175         vector_foreach_slot (mpp->pg, pgp, i){
176                 vector_foreach_slot (pgp->paths, pp, j){
177                         if (pp->state == PATH_UNCHECKED || 
178                             pp->state == PATH_WILD)
179                                 continue;
180                         if ((pp->dmstate == PSTATE_FAILED ||
181                              pp->dmstate == PSTATE_UNDEF) &&
182                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
183                                 dm_reinstate_path(mpp->alias, pp->dev_t);
184                         else if ((pp->dmstate == PSTATE_ACTIVE ||
185                                   pp->dmstate == PSTATE_UNDEF) &&
186                                  (pp->state == PATH_DOWN ||
187                                   pp->state == PATH_SHAKY))
188                                 dm_fail_path(mpp->alias, pp->dev_t);
189                 }
190         }
191 }
192
193 static void
194 sync_maps_state(vector mpvec)
195 {
196         unsigned int i;
197         struct multipath *mpp;
198
199         vector_foreach_slot (mpvec, mpp, i)
200                 sync_map_state(mpp);
201 }
202
203 static int
204 flush_map(struct multipath * mpp, struct vectors * vecs)
205 {
206         /*
207          * clear references to this map before flushing so we can ignore
208          * the spurious uevent we may generate with the dm_flush_map call below
209          */
210         if (dm_flush_map(mpp->alias)) {
211                 /*
212                  * May not really be an error -- if the map was already flushed
213                  * from the device mapper by dmsetup(8) for instance.
214                  */
215                 condlog(0, "%s: can't flush", mpp->alias);
216                 return 1;
217         }
218         else {
219                 dm_lib_release();
220                 condlog(2, "%s: devmap removed", mpp->alias);
221         }
222
223         orphan_paths(vecs->pathvec, mpp);
224         remove_map_and_stop_waiter(mpp, vecs, 1);
225
226         return 0;
227 }
228
229 static int
230 uev_add_map (struct uevent * uev, struct vectors * vecs)
231 {
232         char *alias;
233         int major = -1, minor = -1, rc;
234
235         condlog(2, "%s: add map (uevent)", uev->kernel);
236         alias = uevent_get_dm_name(uev);
237         if (!alias) {
238                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
239                 major = uevent_get_major(uev);
240                 minor = uevent_get_minor(uev);
241                 alias = dm_mapname(major, minor);
242                 if (!alias) {
243                         condlog(2, "%s: mapname not found for %d:%d",
244                                 uev->kernel, major, minor);
245                         return 1;
246                 }
247         }
248         rc = ev_add_map(uev->kernel, alias, vecs);
249         FREE(alias);
250         return rc;
251 }
252
253 int
254 ev_add_map (char * dev, char * alias, struct vectors * vecs)
255 {
256         char * refwwid;
257         struct multipath * mpp;
258         int map_present;
259         int r = 1;
260
261         map_present = dm_map_present(alias);
262
263         if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
264                 condlog(4, "%s: not a multipath map", alias);
265                 return 0;
266         }
267
268         mpp = find_mp_by_alias(vecs->mpvec, alias);
269
270         if (mpp) {
271                 /*
272                  * Not really an error -- we generate our own uevent
273                  * if we create a multipath mapped device as a result
274                  * of uev_add_path
275                  */
276                 if (conf->reassign_maps) {
277                         condlog(3, "%s: Reassign existing device-mapper devices",
278                                 alias);
279                         dm_reassign(alias);
280                 }
281                 return 0;
282         }
283
284         /*
285          * now we can register the map
286          */
287         if (map_present && (mpp = add_map_without_path(vecs, alias))) {
288                 sync_map_state(mpp);
289                 condlog(2, "%s: devmap %s registered", alias, dev);
290                 return 0;
291         }
292         refwwid = get_refwwid(dev, DEV_DEVMAP, vecs->pathvec);
293
294         if (refwwid) {
295                 r = coalesce_paths(vecs, NULL, refwwid, 0);
296                 dm_lib_release();
297         }
298
299         if (!r)
300                 condlog(2, "%s: devmap %s added", alias, dev);
301         else
302                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
303
304         FREE(refwwid);
305         return r;
306 }
307
308 static int
309 uev_remove_map (struct uevent * uev, struct vectors * vecs)
310 {
311         char *alias;
312         int minor, rc;
313
314         condlog(2, "%s: remove map (uevent)", uev->kernel);
315         alias = uevent_get_dm_name(uev);
316         if (!alias) {
317                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
318                 return 0;
319         }
320         minor = uevent_get_minor(uev);
321         rc = ev_remove_map(uev->kernel, alias, minor, vecs);
322         FREE(alias);
323         return rc;
324 }
325
326 int
327 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
328 {
329         struct multipath * mpp;
330
331         mpp = find_mp_by_minor(vecs->mpvec, minor);
332
333         if (!mpp) {
334                 condlog(2, "%s: devmap not registered, can't remove",
335                         devname);
336                 return 0;
337         }
338         if (strcmp(mpp->alias, alias)) {
339                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
340                         mpp->alias, mpp->dmi->minor, minor);
341                 return 0;
342         }
343         return flush_map(mpp, vecs);
344 }
345
346 static int
347 uev_add_path (struct uevent *uev, struct vectors * vecs)
348 {
349         struct sysfs_device * dev;
350
351         dev = sysfs_device_get(uev->devpath);
352         if (!dev) {
353                 condlog(2, "%s: not found in sysfs", uev->devpath);
354                 return 1;
355         }
356         condlog(2, "%s: add path (uevent)", dev->kernel);
357         return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
358 }
359
360 /*
361  * returns:
362  * 0: added
363  * 1: error
364  * 2: blacklisted
365  */
366 int
367 ev_add_path (char * devname, struct vectors * vecs)
368 {
369         struct multipath * mpp;
370         struct path * pp;
371         char empty_buff[WWID_SIZE] = {0};
372         char params[PARAMS_SIZE] = {0};
373         int retries = 3;
374         int start_waiter = 0;
375
376         if (strstr(devname, "..") != NULL) {
377                 /*
378                  * Don't allow relative device names in the pathvec
379                  */
380                 condlog(0, "%s: path name is invalid", devname);
381                 return 1;
382         }
383
384         pp = find_path_by_dev(vecs->pathvec, devname);
385
386         if (pp) {
387                 condlog(0, "%s: spurious uevent, path already in pathvec",
388                         devname);
389                 if (pp->mpp)
390                         return 0;
391         }
392         else {
393                 /*
394                  * get path vital state
395                  */
396                 if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
397                       devname, DI_ALL))) {
398                         condlog(0, "%s: failed to store path info", devname);
399                         return 1;
400                 }
401                 pp->checkint = conf->checkint;
402         }
403
404         /*
405          * need path UID to go any further
406          */
407         if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
408                 condlog(0, "%s: failed to get path uid", devname);
409                 goto fail; /* leave path added to pathvec */
410         }
411         if (filter_path(conf, pp) > 0){
412                 int i = find_slot(vecs->pathvec, (void *)pp);
413                 if (i != -1)
414                         vector_del_slot(vecs->pathvec, i);
415                 free_path(pp);
416                 return 2;
417         }
418         mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
419 rescan:
420         if (mpp) {
421                 if ((!pp->size) || (mpp->size != pp->size)) {
422                         if (!pp->size)
423                                 condlog(0, "%s: failed to add new path %s, "
424                                         "device size is 0",
425                                         devname, pp->dev);
426                         else
427                                 condlog(0, "%s: failed to add new path %s, "
428                                         "device size mismatch",
429                                         devname, pp->dev);
430                         int i = find_slot(vecs->pathvec, (void *)pp);
431                         if (i != -1)
432                                 vector_del_slot(vecs->pathvec, i);
433                         free_path(pp);
434                         return 1;
435                 }
436
437                 condlog(4,"%s: adopting all paths for path %s",
438                         mpp->alias, pp->dev);
439                 if (adopt_paths(vecs->pathvec, mpp, 1))
440                         goto fail; /* leave path added to pathvec */
441
442                 verify_paths(mpp, vecs, NULL);
443                 mpp->flush_on_last_del = FLUSH_UNDEF;
444                 mpp->action = ACT_RELOAD;
445         }
446         else {
447                 if (!pp->size) {
448                         condlog(0, "%s: failed to create new map,"
449                                 " %s device size is 0 ", devname, pp->dev);
450                         int i = find_slot(vecs->pathvec, (void *)pp);
451                         if (i != -1)
452                                 vector_del_slot(vecs->pathvec, i);
453                         free_path(pp);
454                         return 1;
455                 }
456
457                 condlog(4,"%s: creating new map", pp->dev);
458                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
459                         mpp->action = ACT_CREATE;
460                         /*
461                          * We don't depend on ACT_CREATE, as domap will
462                          * set it to ACT_NOTHING when complete.
463                          */
464                         start_waiter = 1;
465                 }
466                 else
467                         goto fail; /* leave path added to pathvec */
468         }
469
470         /*
471          * push the map to the device-mapper
472          */
473         if (setup_map(mpp, params, PARAMS_SIZE)) {
474                 condlog(0, "%s: failed to setup map for addition of new "
475                         "path %s", mpp->alias, devname);
476                 goto fail_map;
477         }
478         /*
479          * reload the map for the multipath mapped device
480          */
481         if (domap(mpp, params) <= 0) {
482                 condlog(0, "%s: failed in domap for addition of new "
483                         "path %s", mpp->alias, devname);
484                 /*
485                  * deal with asynchronous uevents :((
486                  */
487                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
488                         condlog(0, "%s: uev_add_path sleep", mpp->alias);
489                         sleep(1);
490                         update_mpp_paths(mpp, vecs->pathvec);
491                         goto rescan;
492                 }
493                 else if (mpp->action == ACT_RELOAD)
494                         condlog(0, "%s: giving up reload", mpp->alias);
495                 else
496                         goto fail_map;
497         }
498         dm_lib_release();
499
500         /*
501          * update our state from kernel regardless of create or reload
502          */
503         if (setup_multipath(vecs, mpp))
504                 goto fail_map;
505
506         sync_map_state(mpp);
507
508         if ((mpp->action == ACT_CREATE ||
509              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
510             start_waiter_thread(mpp, vecs))
511                         goto fail_map;
512
513         if (retries >= 0) {
514                 condlog(2, "%s path added to devmap %s", devname, mpp->alias);
515                 return 0;
516         }
517         else
518                 return 1;
519
520 fail_map:
521         remove_map(mpp, vecs, 1);
522 fail:
523         orphan_path(pp);
524         return 1;
525 }
526
527 static int
528 uev_remove_path (struct uevent *uev, struct vectors * vecs)
529 {
530         struct sysfs_device * dev;
531         int retval;
532
533         dev = sysfs_device_get(uev->devpath);
534         if (!dev) {
535                 condlog(2, "%s: not found in sysfs", uev->devpath);
536                 return 1;
537         }
538         condlog(2, "%s: remove path (uevent)", uev->kernel);
539         retval = ev_remove_path(uev->kernel, vecs);
540
541         if (!retval)
542                 sysfs_device_put(dev);
543
544         return retval;
545 }
546
547 int
548 ev_remove_path (char * devname, struct vectors * vecs)
549 {
550         struct multipath * mpp;
551         struct path * pp;
552         int i, retval = 0;
553         char params[PARAMS_SIZE] = {0};
554
555         pp = find_path_by_dev(vecs->pathvec, devname);
556
557         if (!pp) {
558                 /* Not an error; path might have been purged earlier */
559                 condlog(0, "%s: path already removed", devname);
560                 return 0;
561         }
562
563         /*
564          * avoid referring to the map of an orphaned path
565          */
566         if ((mpp = pp->mpp)) {
567                 /*
568                  * transform the mp->pg vector of vectors of paths
569                  * into a mp->params string to feed the device-mapper
570                  */
571                 if (update_mpp_paths(mpp, vecs->pathvec)) {
572                         condlog(0, "%s: failed to update paths",
573                                 mpp->alias);
574                         goto fail;
575                 }
576                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
577                         vector_del_slot(mpp->paths, i);
578
579                 /*
580                  * remove the map IFF removing the last path
581                  */
582                 if (VECTOR_SIZE(mpp->paths) == 0) {
583                         char alias[WWID_SIZE];
584
585                         /*
586                          * flush_map will fail if the device is open
587                          */
588                         strncpy(alias, mpp->alias, WWID_SIZE);
589                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
590                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
591                                 mpp->retry_tick = 0;
592                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
593                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
594                                 dm_queue_if_no_path(mpp->alias, 0);
595                         }
596                         if (!flush_map(mpp, vecs)) {
597                                 condlog(2, "%s: removed map after"
598                                         " removing all paths",
599                                         alias);
600                                 retval = 0;
601                                 goto out;
602                         }
603                         /*
604                          * Not an error, continue
605                          */
606                 }
607
608                 if (setup_map(mpp, params, PARAMS_SIZE)) {
609                         condlog(0, "%s: failed to setup map for"
610                                 " removal of path %s", mpp->alias,
611                                 devname);
612                         goto fail;
613                 }
614                 /*
615                  * reload the map
616                  */
617                 mpp->action = ACT_RELOAD;
618                 if (domap(mpp, params) <= 0) {
619                         condlog(0, "%s: failed in domap for "
620                                 "removal of path %s",
621                                 mpp->alias, devname);
622                         retval = 1;
623                 } else {
624                         /*
625                          * update our state from kernel
626                          */
627                         if (setup_multipath(vecs, mpp)) {
628                                 goto fail;
629                         }
630                         sync_map_state(mpp);
631
632                         condlog(2, "%s: path removed from map %s",
633                                 devname, mpp->alias);
634                 }
635         }
636
637 out:
638         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
639                 vector_del_slot(vecs->pathvec, i);
640
641         free_path(pp);
642
643         return retval;
644
645 fail:
646         remove_map_and_stop_waiter(mpp, vecs, 1);
647         return 1;
648 }
649
650 static int
651 uev_update_path (struct uevent *uev, struct vectors * vecs)
652 {
653         struct sysfs_device * dev;
654         int retval, ro;
655
656         dev = sysfs_device_get(uev->devpath);
657         if (!dev) {
658                 condlog(2, "%s: not found in sysfs", uev->devpath);
659                 return 1;
660         }
661         ro = uevent_get_disk_ro(uev);
662
663         if (ro >= 0) {
664                 struct path * pp;
665
666                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
667                         uev->kernel, ro);
668                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
669                 if (!pp) {
670                         condlog(0, "%s: spurious uevent, path not found",
671                                 uev->kernel);
672                         return 1;
673                 }
674                 if (pp->mpp)
675                         retval = reload_map(vecs, pp->mpp);
676
677                 condlog(2, "%s: map %s reloaded (retval %d)",
678                         uev->kernel, pp->mpp->alias, retval);
679
680         }
681
682         sysfs_device_put(dev);
683
684         return retval;
685 }
686
687 static int
688 map_discovery (struct vectors * vecs)
689 {
690         struct multipath * mpp;
691         unsigned int i;
692
693         if (dm_get_maps(vecs->mpvec))
694                 return 1;
695
696         vector_foreach_slot (vecs->mpvec, mpp, i)
697                 if (setup_multipath(vecs, mpp))
698                         return 1;
699
700         return 0;
701 }
702
703 int
704 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
705 {
706         struct vectors * vecs;
707         int r;
708
709         *reply = NULL;
710         *len = 0;
711         vecs = (struct vectors *)trigger_data;
712
713         pthread_cleanup_push(cleanup_lock, &vecs->lock);
714         lock(vecs->lock);
715
716         r = parse_cmd(str, reply, len, vecs);
717
718         if (r > 0) {
719                 *reply = STRDUP("fail\n");
720                 *len = strlen(*reply) + 1;
721                 r = 1;
722         }
723         else if (!r && *len == 0) {
724                 *reply = STRDUP("ok\n");
725                 *len = strlen(*reply) + 1;
726                 r = 0;
727         }
728         /* else if (r < 0) leave *reply alone */
729
730         lock_cleanup_pop(vecs->lock);
731         return r;
732 }
733
734 static int
735 uev_discard(char * devpath)
736 {
737         char *tmp;
738         char a[11], b[11];
739
740         /*
741          * keep only block devices, discard partitions
742          */
743         tmp = strstr(devpath, "/block/");
744         if (tmp == NULL){
745                 condlog(4, "no /block/ in '%s'", devpath);
746                 return 1;
747         }
748         if (sscanf(tmp, "/block/%10s", a) != 1 ||
749             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
750                 condlog(4, "discard event on %s", devpath);
751                 return 1;
752         }
753         return 0;
754 }
755
756 int
757 uev_trigger (struct uevent * uev, void * trigger_data)
758 {
759         int r = 0;
760         struct vectors * vecs;
761
762         vecs = (struct vectors *)trigger_data;
763
764         if (uev_discard(uev->devpath))
765                 return 0;
766
767         lock(vecs->lock);
768
769         /*
770          * device map event
771          * Add events are ignored here as the tables
772          * are not fully initialised then.
773          */
774         if (!strncmp(uev->kernel, "dm-", 3)) {
775                 if (!strncmp(uev->action, "change", 6)) {
776                         r = uev_add_map(uev, vecs);
777                         goto out;
778                 }
779                 if (!strncmp(uev->action, "remove", 6)) {
780                         r = uev_remove_map(uev, vecs);
781                         goto out;
782                 }
783                 goto out;
784         }
785
786         /*
787          * path add/remove event
788          */
789         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
790                            uev->kernel) > 0)
791                 goto out;
792
793         if (!strncmp(uev->action, "add", 3)) {
794                 r = uev_add_path(uev, vecs);
795                 goto out;
796         }
797         if (!strncmp(uev->action, "remove", 6)) {
798                 r = uev_remove_path(uev, vecs);
799                 goto out;
800         }
801         if (!strncmp(uev->action, "change", 6)) {
802                 r = uev_update_path(uev, vecs);
803                 goto out;
804         }
805
806 out:
807         unlock(vecs->lock);
808         return r;
809 }
810
811 static void *
812 ueventloop (void * ap)
813 {
814         block_signal(SIGUSR1, NULL);
815         block_signal(SIGHUP, NULL);
816
817         if (uevent_listen())
818                 condlog(0, "error starting uevent listener");
819
820         return NULL;
821 }
822
823 static void *
824 uevqloop (void * ap)
825 {
826         block_signal(SIGUSR1, NULL);
827         block_signal(SIGHUP, NULL);
828
829         if (uevent_dispatch(&uev_trigger, ap))
830                 condlog(0, "error starting uevent dispatcher");
831
832         return NULL;
833 }
834 static void *
835 uxlsnrloop (void * ap)
836 {
837         block_signal(SIGUSR1, NULL);
838         block_signal(SIGHUP, NULL);
839
840         if (cli_init())
841                 return NULL;
842
843         set_handler_callback(LIST+PATHS, cli_list_paths);
844         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
845         set_handler_callback(LIST+MAPS, cli_list_maps);
846         set_handler_callback(LIST+STATUS, cli_list_status);
847         set_handler_callback(LIST+DAEMON, cli_list_daemon);
848         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
849         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
850         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
851         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
852         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
853         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
854         set_handler_callback(LIST+CONFIG, cli_list_config);
855         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
856         set_handler_callback(LIST+DEVICES, cli_list_devices);
857         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
858         set_handler_callback(ADD+PATH, cli_add_path);
859         set_handler_callback(DEL+PATH, cli_del_path);
860         set_handler_callback(ADD+MAP, cli_add_map);
861         set_handler_callback(DEL+MAP, cli_del_map);
862         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
863         set_handler_callback(RECONFIGURE, cli_reconfigure);
864         set_handler_callback(SUSPEND+MAP, cli_suspend);
865         set_handler_callback(RESUME+MAP, cli_resume);
866         set_handler_callback(RESIZE+MAP, cli_resize);
867         set_handler_callback(RELOAD+MAP, cli_reload);
868         set_handler_callback(RESET+MAP, cli_reassign);
869         set_handler_callback(REINSTATE+PATH, cli_reinstate);
870         set_handler_callback(FAIL+PATH, cli_fail);
871         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
872         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
873         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
874         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
875         set_handler_callback(QUIT, cli_quit);
876         set_handler_callback(SHUTDOWN, cli_shutdown);
877
878         umask(077);
879         uxsock_listen(&uxsock_trigger, ap);
880
881         return NULL;
882 }
883
884 int
885 exit_daemon (int status)
886 {
887         if (status != 0)
888                 fprintf(stderr, "bad exit status. see daemon.log\n");
889
890         condlog(3, "unlink pidfile");
891         unlink(DEFAULT_PIDFILE);
892
893         pthread_mutex_lock(&exit_mutex);
894         pthread_cond_signal(&exit_cond);
895         pthread_mutex_unlock(&exit_mutex);
896
897         return status;
898 }
899
900 const char *
901 daemon_status(void)
902 {
903         switch (running_state) {
904         case DAEMON_INIT:
905                 return "init";
906         case DAEMON_START:
907                 return "startup";
908         case DAEMON_CONFIGURE:
909                 return "configure";
910         case DAEMON_RUNNING:
911                 return "running";
912         case DAEMON_SHUTDOWN:
913                 return "shutdown";
914         }
915         return NULL;
916 }
917
918 static void
919 fail_path (struct path * pp, int del_active)
920 {
921         if (!pp->mpp)
922                 return;
923
924         condlog(2, "checker failed path %s in map %s",
925                  pp->dev_t, pp->mpp->alias);
926
927         dm_fail_path(pp->mpp->alias, pp->dev_t);
928         if (del_active)
929                 update_queue_mode_del_path(pp->mpp);
930 }
931
932 /*
933  * caller must have locked the path list before calling that function
934  */
935 static void
936 reinstate_path (struct path * pp, int add_active)
937 {
938         if (!pp->mpp)
939                 return;
940
941         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
942                 condlog(0, "%s: reinstate failed", pp->dev_t);
943         else {
944                 condlog(2, "%s: reinstated", pp->dev_t);
945                 if (add_active)
946                         update_queue_mode_add_path(pp->mpp);
947         }
948 }
949
950 static void
951 enable_group(struct path * pp)
952 {
953         struct pathgroup * pgp;
954
955         /*
956          * if path is added through uev_add_path, pgindex can be unset.
957          * next update_strings() will set it, upon map reload event.
958          *
959          * we can safely return here, because upon map reload, all
960          * PG will be enabled.
961          */
962         if (!pp->mpp->pg || !pp->pgindex)
963                 return;
964
965         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
966
967         if (pgp->status == PGSTATE_DISABLED) {
968                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
969                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
970         }
971 }
972
973 static void
974 mpvec_garbage_collector (struct vectors * vecs)
975 {
976         struct multipath * mpp;
977         unsigned int i;
978
979         if (!vecs->mpvec)
980                 return;
981
982         vector_foreach_slot (vecs->mpvec, mpp, i) {
983                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
984                         condlog(2, "%s: remove dead map", mpp->alias);
985                         remove_map_and_stop_waiter(mpp, vecs, 1);
986                         i--;
987                 }
988         }
989 }
990
991 static void
992 defered_failback_tick (vector mpvec)
993 {
994         struct multipath * mpp;
995         unsigned int i;
996
997         vector_foreach_slot (mpvec, mpp, i) {
998                 /*
999                  * defered failback getting sooner
1000                  */
1001                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1002                         mpp->failback_tick--;
1003
1004                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1005                                 switch_pathgroup(mpp);
1006                 }
1007         }
1008 }
1009
1010 static void
1011 retry_count_tick(vector mpvec)
1012 {
1013         struct multipath *mpp;
1014         unsigned int i;
1015
1016         vector_foreach_slot (mpvec, mpp, i) {
1017                 if (mpp->retry_tick) {
1018                         mpp->stat_total_queueing_time++;
1019                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1020                         if(--mpp->retry_tick == 0) {
1021                                 dm_queue_if_no_path(mpp->alias, 0);
1022                                 condlog(2, "%s: Disable queueing", mpp->alias);
1023                         }
1024                 }
1025         }
1026 }
1027
1028 int update_prio(struct path *pp, int refresh_all)
1029 {
1030         int oldpriority;
1031         struct path *pp1;
1032         struct pathgroup * pgp;
1033         int i, j, changed = 0;
1034
1035         if (refresh_all) {
1036                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1037                         vector_foreach_slot (pgp->paths, pp1, j) {
1038                                 oldpriority = pp1->priority;
1039                                 pathinfo(pp1, conf->hwtable, DI_PRIO);
1040                                 if (pp1->priority != oldpriority)
1041                                         changed = 1;
1042                         }
1043                 }
1044                 return changed;
1045         }
1046         oldpriority = pp->priority;
1047         pathinfo(pp, conf->hwtable, DI_PRIO);
1048
1049         if (pp->priority == oldpriority)
1050                 return 0;
1051         return 1;
1052 }
1053
1054 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1055 {
1056         int i;
1057         struct path * pp;
1058         char params[PARAMS_SIZE];
1059
1060         update_mpp_paths(mpp, vecs->pathvec);
1061         if (refresh) {
1062                 vector_foreach_slot (mpp->paths, pp, i)
1063                         pathinfo(pp, conf->hwtable, DI_PRIO);
1064         }
1065         params[0] = '\0';
1066         if (setup_map(mpp, params, PARAMS_SIZE))
1067                 return 1;
1068
1069         mpp->action = ACT_RELOAD;
1070         if (domap(mpp, params) <= 0) {
1071                 condlog(0, "%s: failed to update map : %s", mpp->alias,
1072                         strerror(errno));
1073                 return 1;
1074         }
1075         dm_lib_release();
1076         if (setup_multipath(vecs, mpp) != 0)
1077                 return 1;
1078         sync_map_state(mpp);
1079
1080         return 0;
1081 }
1082
1083 void
1084 check_path (struct vectors * vecs, struct path * pp)
1085 {
1086         int newstate;
1087         int new_path_up = 0;
1088
1089         if (!pp->mpp)
1090                 return;
1091
1092         if (pp->tick && --pp->tick)
1093                 return; /* don't check this path yet */
1094
1095         /*
1096          * provision a next check soonest,
1097          * in case we exit abnormaly from here
1098          */
1099         pp->tick = conf->checkint;
1100
1101         newstate = path_offline(pp);
1102         if (newstate == PATH_UP)
1103                 newstate = get_state(pp, 1);
1104
1105         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1106                 condlog(2, "%s: unusable path", pp->dev);
1107                 pathinfo(pp, conf->hwtable, 0);
1108                 return;
1109         }
1110         /*
1111          * Async IO in flight. Keep the previous path state
1112          * and reschedule as soon as possible
1113          */
1114         if (newstate == PATH_PENDING) {
1115                 pp->tick = 1;
1116                 return;
1117         }
1118         /*
1119          * Synchronize with kernel state
1120          */
1121         if (update_multipath_strings(pp->mpp, vecs->pathvec)) {
1122                 condlog(1, "%s: Could not synchronize with kernel state\n",
1123                         pp->dev);
1124                 pp->dmstate = PSTATE_UNDEF;
1125         }
1126         if (newstate != pp->state) {
1127                 int oldstate = pp->state;
1128                 pp->state = newstate;
1129                 LOG_MSG(1, checker_message(&pp->checker));
1130
1131                 /*
1132                  * upon state change, reset the checkint
1133                  * to the shortest delay
1134                  */
1135                 pp->checkint = conf->checkint;
1136
1137                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1138                         /*
1139                          * proactively fail path in the DM
1140                          */
1141                         if (oldstate == PATH_UP ||
1142                             oldstate == PATH_GHOST)
1143                                 fail_path(pp, 1);
1144                         else
1145                                 fail_path(pp, 0);
1146
1147                         /*
1148                          * cancel scheduled failback
1149                          */
1150                         pp->mpp->failback_tick = 0;
1151
1152                         pp->mpp->stat_path_failures++;
1153                         return;
1154                 }
1155
1156                 /*
1157                  * reinstate this path
1158                  */
1159                 if (oldstate != PATH_UP &&
1160                     oldstate != PATH_GHOST)
1161                         reinstate_path(pp, 1);
1162                 else
1163                         reinstate_path(pp, 0);
1164
1165                 new_path_up = 1;
1166
1167                 /*
1168                  * if at least one path is up in a group, and
1169                  * the group is disabled, re-enable it
1170                  */
1171                 if (newstate == PATH_UP)
1172                         enable_group(pp);
1173         }
1174         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1175                 if (pp->dmstate == PSTATE_FAILED ||
1176                     pp->dmstate == PSTATE_UNDEF) {
1177                         /* Clear IO errors */
1178                         reinstate_path(pp, 0);
1179                 } else {
1180                         LOG_MSG(4, checker_message(&pp->checker));
1181                         /*
1182                          * double the next check delay.
1183                          * max at conf->max_checkint
1184                          */
1185                         if (pp->checkint < (conf->max_checkint / 2))
1186                                 pp->checkint = 2 * pp->checkint;
1187                         else
1188                                 pp->checkint = conf->max_checkint;
1189
1190                         pp->tick = pp->checkint;
1191                         condlog(4, "%s: delay next check %is",
1192                                 pp->dev_t, pp->tick);
1193                 }
1194         }
1195         else if (newstate == PATH_DOWN)
1196                 LOG_MSG(2, checker_message(&pp->checker));
1197
1198         pp->state = newstate;
1199
1200         /*
1201          * path prio refreshing
1202          */
1203         condlog(4, "path prio refresh");
1204
1205         if (update_prio(pp, new_path_up) &&
1206             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1207              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1208                 update_path_groups(pp->mpp, vecs, !new_path_up);
1209         else if (need_switch_pathgroup(pp->mpp, 0)) {
1210                 if (pp->mpp->pgfailback > 0 &&
1211                     (new_path_up || pp->mpp->failback_tick <= 0))
1212                         pp->mpp->failback_tick =
1213                                 pp->mpp->pgfailback + 1;
1214                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1215                         switch_pathgroup(pp->mpp);
1216         }
1217 }
1218
1219 static void *
1220 checkerloop (void *ap)
1221 {
1222         struct vectors *vecs;
1223         struct path *pp;
1224         int count = 0;
1225         unsigned int i;
1226         sigset_t old;
1227
1228         mlockall(MCL_CURRENT | MCL_FUTURE);
1229         vecs = (struct vectors *)ap;
1230         condlog(2, "path checkers start up");
1231
1232         /*
1233          * init the path check interval
1234          */
1235         vector_foreach_slot (vecs->pathvec, pp, i) {
1236                 pp->checkint = conf->checkint;
1237         }
1238
1239         while (1) {
1240                 block_signal(SIGHUP, &old);
1241                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1242                 lock(vecs->lock);
1243                 condlog(4, "tick");
1244
1245                 if (vecs->pathvec) {
1246                         vector_foreach_slot (vecs->pathvec, pp, i) {
1247                                 check_path(vecs, pp);
1248                         }
1249                 }
1250                 if (vecs->mpvec) {
1251                         defered_failback_tick(vecs->mpvec);
1252                         retry_count_tick(vecs->mpvec);
1253                 }
1254                 if (count)
1255                         count--;
1256                 else {
1257                         condlog(4, "map garbage collection");
1258                         mpvec_garbage_collector(vecs);
1259                         count = MAPGCINT;
1260                 }
1261
1262                 lock_cleanup_pop(vecs->lock);
1263                 pthread_sigmask(SIG_SETMASK, &old, NULL);
1264                 sleep(1);
1265         }
1266         return NULL;
1267 }
1268
1269 int
1270 configure (struct vectors * vecs, int start_waiters)
1271 {
1272         struct multipath * mpp;
1273         struct path * pp;
1274         vector mpvec;
1275         int i;
1276
1277         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1278                 return 1;
1279
1280         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1281                 return 1;
1282
1283         if (!(mpvec = vector_alloc()))
1284                 return 1;
1285
1286         /*
1287          * probe for current path (from sysfs) and map (from dm) sets
1288          */
1289         path_discovery(vecs->pathvec, conf, DI_ALL);
1290
1291         vector_foreach_slot (vecs->pathvec, pp, i){
1292                 if (filter_path(conf, pp) > 0){
1293                         vector_del_slot(vecs->pathvec, i);
1294                         free_path(pp);
1295                         i--;
1296                 }
1297                 else
1298                         pp->checkint = conf->checkint;
1299         }
1300         if (map_discovery(vecs))
1301                 return 1;
1302
1303         /*
1304          * create new set of maps & push changed ones into dm
1305          */
1306         if (coalesce_paths(vecs, mpvec, NULL, 1))
1307                 return 1;
1308
1309         /*
1310          * may need to remove some maps which are no longer relevant
1311          * e.g., due to blacklist changes in conf file
1312          */
1313         if (coalesce_maps(vecs, mpvec))
1314                 return 1;
1315
1316         dm_lib_release();
1317
1318         sync_maps_state(mpvec);
1319
1320         /*
1321          * purge dm of old maps
1322          */
1323         remove_maps(vecs);
1324
1325         /*
1326          * save new set of maps formed by considering current path state
1327          */
1328         vector_free(vecs->mpvec);
1329         vecs->mpvec = mpvec;
1330
1331         /*
1332          * start dm event waiter threads for these new maps
1333          */
1334         vector_foreach_slot(vecs->mpvec, mpp, i) {
1335                 if (setup_multipath(vecs, mpp))
1336                         return 1;
1337                 if (start_waiters)
1338                         if (start_waiter_thread(mpp, vecs))
1339                                 return 1;
1340         }
1341         return 0;
1342 }
1343
1344 int
1345 reconfigure (struct vectors * vecs)
1346 {
1347         struct config * old = conf;
1348         int retval = 1;
1349
1350         /*
1351          * free old map and path vectors ... they use old conf state
1352          */
1353         if (VECTOR_SIZE(vecs->mpvec))
1354                 remove_maps_and_stop_waiters(vecs);
1355
1356         if (VECTOR_SIZE(vecs->pathvec))
1357                 free_pathvec(vecs->pathvec, FREE_PATHS);
1358
1359         vecs->pathvec = NULL;
1360         conf = NULL;
1361
1362         if (!load_config(DEFAULT_CONFIGFILE)) {
1363                 conf->verbosity = old->verbosity;
1364                 conf->daemon = 1;
1365                 configure(vecs, 1);
1366                 free_config(old);
1367                 retval = 0;
1368         }
1369
1370         return retval;
1371 }
1372
1373 static struct vectors *
1374 init_vecs (void)
1375 {
1376         struct vectors * vecs;
1377
1378         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1379
1380         if (!vecs)
1381                 return NULL;
1382
1383         vecs->lock.mutex =
1384                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1385
1386         if (!vecs->lock.mutex)
1387                 goto out;
1388
1389         pthread_mutex_init(vecs->lock.mutex, NULL);
1390         vecs->lock.depth = 0;
1391
1392         return vecs;
1393
1394 out:
1395         FREE(vecs);
1396         condlog(0, "failed to init paths");
1397         return NULL;
1398 }
1399
1400 static void *
1401 signal_set(int signo, void (*func) (int))
1402 {
1403         int r;
1404         struct sigaction sig;
1405         struct sigaction osig;
1406
1407         sig.sa_handler = func;
1408         sigemptyset(&sig.sa_mask);
1409         sig.sa_flags = 0;
1410
1411         r = sigaction(signo, &sig, &osig);
1412
1413         if (r < 0)
1414                 return (SIG_ERR);
1415         else
1416                 return (osig.sa_handler);
1417 }
1418
1419 static void
1420 sighup (int sig)
1421 {
1422         condlog(2, "reconfigure (SIGHUP)");
1423
1424         if (running_state != DAEMON_RUNNING)
1425                 return;
1426
1427         lock(gvecs->lock);
1428         reconfigure(gvecs);
1429         unlock(gvecs->lock);
1430
1431 #ifdef _DEBUG_
1432         dbg_free_final(NULL);
1433 #endif
1434 }
1435
1436 static void
1437 sigend (int sig)
1438 {
1439         exit_daemon(0);
1440 }
1441
1442 static void
1443 sigusr1 (int sig)
1444 {
1445         condlog(3, "SIGUSR1 received");
1446 }
1447
1448 static void
1449 signal_init(void)
1450 {
1451         signal_set(SIGHUP, sighup);
1452         signal_set(SIGUSR1, sigusr1);
1453         signal_set(SIGINT, sigend);
1454         signal_set(SIGTERM, sigend);
1455         signal(SIGPIPE, SIG_IGN);
1456 }
1457
1458 static void
1459 setscheduler (void)
1460 {
1461         int res;
1462         static struct sched_param sched_param = {
1463                 .sched_priority = 99
1464         };
1465
1466         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1467
1468         if (res == -1)
1469                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1470         return;
1471 }
1472
1473 static void
1474 set_oom_adj (void)
1475 {
1476         int retry = 1;
1477         char *file = "/proc/self/oom_score_adj";
1478         int score = OOM_SCORE_ADJ_MIN;
1479         FILE *fp;
1480         struct stat st;
1481
1482         do {
1483                 if (stat(file, &st) == 0){
1484                         fp = fopen(file, "w");
1485                         if (!fp) {
1486                                 condlog(0, "couldn't fopen %s : %s", file,
1487                                         strerror(errno));
1488                                 return;
1489                         }
1490                         fprintf(fp, "%i", score);
1491                         fclose(fp);
1492                         return;
1493                 }
1494                 if (errno != ENOENT) {
1495                         condlog(0, "couldn't stat %s : %s", file,
1496                                 strerror(errno));
1497                         return;
1498                 }
1499                 file = "/proc/self/oom_adj";
1500                 score = OOM_ADJUST_MIN;
1501         } while (retry--);
1502         condlog(0, "couldn't adjust oom score");
1503 }
1504
1505 static int
1506 child (void * param)
1507 {
1508         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
1509         pthread_attr_t log_attr, misc_attr;
1510         struct vectors * vecs;
1511         struct multipath * mpp;
1512         int i;
1513         int rc;
1514
1515         mlockall(MCL_CURRENT | MCL_FUTURE);
1516
1517         setup_thread_attr(&misc_attr, 64 * 1024, 1);
1518         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
1519
1520         if (logsink) {
1521                 setup_thread_attr(&log_attr, 64 * 1024, 0);
1522                 log_thread_start(&log_attr);
1523                 pthread_attr_destroy(&log_attr);
1524         }
1525
1526         running_state = DAEMON_START;
1527
1528         condlog(2, "--------start up--------");
1529         condlog(2, "read " DEFAULT_CONFIGFILE);
1530
1531         if (load_config(DEFAULT_CONFIGFILE))
1532                 exit(1);
1533
1534         if (init_checkers()) {
1535                 condlog(0, "failed to initialize checkers");
1536                 exit(1);
1537         }
1538         if (init_prio()) {
1539                 condlog(0, "failed to initialize prioritizers");
1540                 exit(1);
1541         }
1542
1543         setlogmask(LOG_UPTO(conf->verbosity + 3));
1544
1545         /*
1546          * fill the voids left in the config file
1547          */
1548         if (!conf->checkint) {
1549                 conf->checkint = DEFAULT_CHECKINT;
1550                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1551         }
1552
1553         if (conf->max_fds) {
1554                 struct rlimit fd_limit;
1555
1556                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
1557                         condlog(0, "can't get open fds limit: %s\n",
1558                                 strerror(errno));
1559                         fd_limit.rlim_cur = 0;
1560                         fd_limit.rlim_max = 0;
1561                 }
1562                 if (fd_limit.rlim_cur < conf->max_fds) {
1563                         fd_limit.rlim_cur = conf->max_fds;
1564                         if (fd_limit.rlim_max < conf->max_fds)
1565                                 fd_limit.rlim_max = conf->max_fds;
1566                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
1567                                 condlog(0, "can't set open fds limit to "
1568                                         "%lu/%lu : %s\n",
1569                                         fd_limit.rlim_cur, fd_limit.rlim_max,
1570                                         strerror(errno));
1571                         } else {
1572                                 condlog(3, "set open fds limit to %lu/%lu\n",
1573                                         fd_limit.rlim_cur, fd_limit.rlim_max);
1574                         }
1575                 }
1576
1577         }
1578
1579         signal_init();
1580         setscheduler();
1581         set_oom_adj();
1582         vecs = gvecs = init_vecs();
1583
1584         if (!vecs)
1585                 exit(1);
1586
1587         if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
1588                 condlog(0, "can not find sysfs mount point");
1589                 exit(1);
1590         }
1591         conf->daemon = 1;
1592         udev_set_sync_support(0);
1593         /*
1594          * Start uevent listener early to catch events
1595          */
1596         if ((rc = pthread_create(&uevent_thr, &misc_attr, ueventloop, vecs))) {
1597                 condlog(0, "failed to create uevent thread: %d", rc);
1598                 exit(1);
1599         }
1600         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
1601                 condlog(0, "failed to create cli listener: %d", rc);
1602                 exit(1);
1603         }
1604         /*
1605          * fetch and configure both paths and multipaths
1606          */
1607         lock(vecs->lock);
1608         running_state = DAEMON_CONFIGURE;
1609
1610         if (configure(vecs, 1)) {
1611                 unlock(vecs->lock);
1612                 condlog(0, "failure during configuration");
1613                 exit(1);
1614         }
1615         unlock(vecs->lock);
1616
1617         /*
1618          * start threads
1619          */
1620         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
1621                 condlog(0,"failed to create checker loop thread: %d", rc);
1622                 exit(1);
1623         }
1624         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
1625                 condlog(0, "failed to create uevent dispatcher: %d", rc);
1626                 exit(1);
1627         }
1628         pthread_attr_destroy(&misc_attr);
1629
1630         pthread_mutex_lock(&exit_mutex);
1631         /* Startup complete, create logfile */
1632         if (pidfile_create(DEFAULT_PIDFILE, daemon_pid))
1633                 /* Ignore errors, we can live without */
1634                 condlog(1, "failed to create pidfile");
1635
1636         running_state = DAEMON_RUNNING;
1637         pthread_cond_wait(&exit_cond, &exit_mutex);
1638
1639         /*
1640          * exit path
1641          */
1642         running_state = DAEMON_SHUTDOWN;
1643         block_signal(SIGHUP, NULL);
1644         lock(vecs->lock);
1645         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
1646                 vector_foreach_slot(vecs->mpvec, mpp, i)
1647                         dm_queue_if_no_path(mpp->alias, 0);
1648         remove_maps_and_stop_waiters(vecs);
1649         unlock(vecs->lock);
1650
1651         pthread_cancel(check_thr);
1652         pthread_cancel(uevent_thr);
1653         pthread_cancel(uxlsnr_thr);
1654         pthread_cancel(uevq_thr);
1655
1656         sysfs_cleanup();
1657
1658         lock(vecs->lock);
1659         free_pathvec(vecs->pathvec, FREE_PATHS);
1660         vecs->pathvec = NULL;
1661         unlock(vecs->lock);
1662         /* Now all the waitevent threads will start rushing in. */
1663         while (vecs->lock.depth > 0) {
1664                 sleep (1); /* This is weak. */
1665                 condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
1666         }
1667         pthread_mutex_destroy(vecs->lock.mutex);
1668         FREE(vecs->lock.mutex);
1669         vecs->lock.depth = 0;
1670         vecs->lock.mutex = NULL;
1671         FREE(vecs);
1672         vecs = NULL;
1673
1674         cleanup_checkers();
1675         cleanup_prio();
1676
1677         dm_lib_release();
1678         dm_lib_exit();
1679
1680         /* We're done here */
1681         condlog(3, "unlink pidfile");
1682         unlink(DEFAULT_PIDFILE);
1683
1684         condlog(2, "--------shut down-------");
1685
1686         if (logsink)
1687                 log_thread_stop();
1688
1689         /*
1690          * Freeing config must be done after condlog() and dm_lib_exit(),
1691          * because logging functions like dlog() and dm_write_log()
1692          * reference the config.
1693          */
1694         free_config(conf);
1695         conf = NULL;
1696
1697 #ifdef _DEBUG_
1698         dbg_free_final(NULL);
1699 #endif
1700
1701         exit(0);
1702 }
1703
1704 static int
1705 daemonize(void)
1706 {
1707         int pid;
1708         int dev_null_fd;
1709
1710         if( (pid = fork()) < 0){
1711                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1712                 return -1;
1713         }
1714         else if (pid != 0)
1715                 return pid;
1716
1717         setsid();
1718
1719         if ( (pid = fork()) < 0)
1720                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1721         else if (pid != 0)
1722                 _exit(0);
1723
1724         if (chdir("/") < 0)
1725                 fprintf(stderr, "cannot chdir to '/', continuing\n");
1726
1727         dev_null_fd = open("/dev/null", O_RDWR);
1728         if (dev_null_fd < 0){
1729                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
1730                         strerror(errno));
1731                 _exit(0);
1732         }
1733
1734         close(STDIN_FILENO);
1735         dup(dev_null_fd);
1736         close(STDOUT_FILENO);
1737         dup(dev_null_fd);
1738         close(STDERR_FILENO);
1739         dup(dev_null_fd);
1740         close(dev_null_fd);
1741         daemon_pid = getpid();
1742         return 0;
1743 }
1744
1745 int
1746 main (int argc, char *argv[])
1747 {
1748         extern char *optarg;
1749         extern int optind;
1750         int arg;
1751         int err;
1752
1753         logsink = 1;
1754         running_state = DAEMON_INIT;
1755         dm_init();
1756
1757         if (getuid() != 0) {
1758                 fprintf(stderr, "need to be root\n");
1759                 exit(1);
1760         }
1761
1762         /* make sure we don't lock any path */
1763         chdir("/");
1764         umask(umask(077) | 022);
1765
1766         conf = alloc_config();
1767
1768         if (!conf)
1769                 exit(1);
1770
1771         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1772         switch(arg) {
1773                 case 'd':
1774                         logsink = 0;
1775                         //debug=1; /* ### comment me out ### */
1776                         break;
1777                 case 'v':
1778                         if (sizeof(optarg) > sizeof(char *) ||
1779                             !isdigit(optarg[0]))
1780                                 exit(1);
1781
1782                         conf->verbosity = atoi(optarg);
1783                         break;
1784                 case 'k':
1785                         uxclnt(optarg);
1786                         exit(0);
1787                 default:
1788                         ;
1789                 }
1790         }
1791         if (optind < argc) {
1792                 char cmd[CMDSIZE];
1793                 char * s = cmd;
1794                 char * c = s;
1795
1796                 while (optind < argc) {
1797                         if (strchr(argv[optind], ' '))
1798                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
1799                         else
1800                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
1801                         optind++;
1802                 }
1803                 c += snprintf(c, s + CMDSIZE - c, "\n");
1804                 uxclnt(s);
1805                 exit(0);
1806         }
1807
1808         if (!logsink)
1809                 err = 0;
1810         else
1811                 err = daemonize();
1812
1813         if (err < 0)
1814                 /* error */
1815                 exit(1);
1816         else if (err > 0)
1817                 /* parent dies */
1818                 exit(0);
1819         else
1820                 /* child lives */
1821                 return (child(NULL));
1822 }
1823