[multipathd] DM configuration ground work #2
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15
16 /*
17  * libsysfs
18  */
19 #include <sysfs/libsysfs.h>
20 #include <sysfs/dlist.h>
21
22 /*
23  * libcheckers
24  */
25 #include <checkers.h>
26 #include <path_state.h>
27
28 /*
29  * libmultipath
30  */
31 #include <parser.h>
32 #include <vector.h>
33 #include <memory.h>
34 #include <config.h>
35 #include <callout.h>
36 #include <util.h>
37 #include <blacklist.h>
38 #include <hwtable.h>
39 #include <defaults.h>
40 #include <structs.h>
41 #include <structs_vec.h>
42 #include <dmparser.h>
43 #include <devmapper.h>
44 #include <dict.h>
45 #include <discovery.h>
46 #include <debug.h>
47 #include <propsel.h>
48 #include <uevent.h>
49 #include <switchgroup.h>
50 #include <path_state.h>
51 #include <print.h>
52 #include <configure.h>
53
54 #include "main.h"
55 #include "pidfile.h"
56 #include "uxlsnr.h"
57 #include "uxclnt.h"
58 #include "cli.h"
59 #include "cli_handlers.h"
60
61 #define FILE_NAME_SIZE 256
62 #define CMDSIZE 160
63
64 #define LOG_MSG(a,b) \
65         if (strlen(b)) { \
66                 condlog(a, "%s: %s", pp->dev_t, b); \
67                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
68         }
69
70 #ifdef LCKDBG
71 #define lock(a) \
72         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
73         pthread_mutex_lock(a)
74 #define unlock(a) \
75         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
76         pthread_mutex_unlock(a)
77 #define lock_cleanup_pop(a) \
78         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
79         pthread_cleanup_pop(1);
80 #else
81 #define lock(a) pthread_mutex_lock(a)
82 #define unlock(a) pthread_mutex_unlock(a)
83 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
84 #endif
85
86 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
87 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
88
89 /*
90  * structs
91  */
92 struct vectors * gvecs; /* global copy of vecs for use in sig handlers */
93
94 struct event_thread {
95         struct dm_task *dmt;
96         pthread_t thread;
97         int event_nr;
98         char mapname[WWID_SIZE];
99         struct vectors *vecs;
100 };
101
102 static struct event_thread *
103 alloc_waiter (void)
104 {
105
106         struct event_thread * wp;
107
108         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
109
110         return wp;
111 }
112
113 static void
114 free_waiter (void * data)
115 {
116         struct event_thread * wp = (struct event_thread *)data;
117
118         if (wp->dmt)
119                 dm_task_destroy(wp->dmt);
120         FREE(wp);
121 }
122
123 static void
124 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
125 {
126         struct event_thread * wp = (struct event_thread *)mpp->waiter;
127         
128         if (!wp) {
129                 condlog(3, "%s: no waiter thread", mpp->alias);
130                 return;
131         }
132         condlog(2, "%s: stop event checker thread", wp->mapname);
133         pthread_kill((pthread_t)wp->thread, SIGUSR1);
134 }
135
136 static void
137 cleanup_lock (void * data)
138 {
139         pthread_mutex_unlock((pthread_mutex_t *)data);
140 }
141
142 /*
143  * mpp->no_path_retry:
144  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
145  *   -1 (FAIL)  : fail_if_no_path
146  *    0 (UNDEF) : nothing
147  *   >0         : queue_if_no_path enabled, turned off after polling n times
148  */
149 static void
150 update_queue_mode_del_path(struct multipath *mpp)
151 {
152         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
153                 /*
154                  * Enter retry mode.
155                  * meaning of +1: retry_tick may be decremented in
156                  *                checkerloop before starting retry.
157                  */
158                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
159                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
160                         mpp->alias, mpp->no_path_retry);
161         }
162         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
163 }
164
165 static void
166 update_queue_mode_add_path(struct multipath *mpp)
167 {
168         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
169                 /* come back to normal mode from retry mode */
170                 mpp->retry_tick = 0;
171                 dm_queue_if_no_path(mpp->alias, 1);
172                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
173                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
174         }
175         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
176 }
177
178 static int
179 need_switch_pathgroup (struct multipath * mpp, int refresh)
180 {
181         struct pathgroup * pgp;
182         struct path * pp;
183         int i, j;
184
185         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
186                 return 0;
187
188         /*
189          * Refresh path priority values
190          */
191         if (refresh)
192                 vector_foreach_slot (mpp->pg, pgp, i)
193                         vector_foreach_slot (pgp->paths, pp, j)
194                                 pathinfo(pp, conf->hwtable, DI_PRIO);
195
196         mpp->bestpg = select_path_group(mpp);
197
198         if (mpp->bestpg != mpp->nextpg)
199                 return 1;
200
201         return 0;
202 }
203
204 static void
205 switch_pathgroup (struct multipath * mpp)
206 {
207         dm_switchgroup(mpp->alias, mpp->bestpg);
208         condlog(2, "%s: switch to path group #%i",
209                  mpp->alias, mpp->bestpg);
210 }
211
212 static int
213 coalesce_maps(struct vectors *vecs, vector nmpv)
214 {
215         struct multipath * ompp;
216         vector ompv = vecs->mpvec;
217         int i, j;
218
219         condlog(3, "coalesce_maps vs = %u", VECTOR_SIZE(ompv));
220         condlog(3, "coalesce_maps vs = %u", VECTOR_SIZE(nmpv));
221
222         vector_foreach_slot (ompv, ompp, i) {
223                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
224                         /*
225                          * remove all current maps not allowed by the
226                          * current configuration
227                          */
228                         if (dm_flush_map(ompp->alias, DEFAULT_TARGET)) {
229                                 condlog(0, "%s: unable to flush devmap",
230                                         ompp->alias);
231                                 /*
232                                  * may be just because the device is open
233                                  */
234                                 if (!vector_alloc_slot(nmpv))
235                                         return 1;
236
237                                 vector_set_slot(nmpv, ompp);
238                                 setup_multipath(vecs, ompp);
239
240                                 if ((j = find_slot(ompv, (void *)ompp)) != -1)
241                                         vector_del_slot(ompv, j);
242
243                                 continue;
244                         }
245                         else
246                                 condlog(3, "%s devmap removed", ompp->alias);
247                 }
248         }
249         return 0;
250 }
251
252 static int
253 update_multipath (struct vectors *vecs, char *mapname)
254 {
255         struct multipath *mpp;
256         struct pathgroup  *pgp;
257         struct path *pp;
258         int i, j;
259         int r = 1;
260
261         mpp = find_mp_by_alias(vecs->mpvec, mapname);
262
263         if (!mpp)
264                 goto out;
265
266         free_pgvec(mpp->pg, KEEP_PATHS);
267         mpp->pg = NULL;
268
269         if (setup_multipath(vecs, mpp))
270                 goto out; /* mpp freed in setup_multipath */
271
272         /*
273          * compare checkers states with DM states
274          */
275         vector_foreach_slot (mpp->pg, pgp, i) {
276                 vector_foreach_slot (pgp->paths, pp, j) {
277                         if (pp->dmstate != PSTATE_FAILED)
278                                 continue;
279
280                         if (pp->state != PATH_DOWN) {
281                                 condlog(2, "%s: mark as failed", pp->dev_t);
282                                 pp->state = PATH_DOWN;
283                                 update_queue_mode_del_path(mpp);
284
285                                 /*
286                                  * if opportune,
287                                  * schedule the next check earlier
288                                  */
289                                 if (pp->tick > conf->checkint)
290                                         pp->tick = conf->checkint;
291                         }
292                 }
293         }
294         r = 0;
295 out:
296         if (r)
297                 condlog(0, "failed to update multipath");
298
299         return r;
300 }
301
302 static sigset_t unblock_signals(void)
303 {
304         sigset_t set, old;
305
306         sigemptyset(&set);
307         sigaddset(&set, SIGHUP);
308         sigaddset(&set, SIGUSR1);
309         pthread_sigmask(SIG_UNBLOCK, &set, &old);
310         return old;
311 }
312
313 /*
314  * returns the reschedule delay
315  * negative means *stop*
316  */
317 static int
318 waiteventloop (struct event_thread * waiter)
319 {
320         sigset_t set;
321         int event_nr;
322         int r;
323
324         if (!waiter->event_nr)
325                 waiter->event_nr = dm_geteventnr(waiter->mapname);
326
327         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT))) {
328                 condlog(0, "%s: devmap event #%i dm_task_create error",
329                                 waiter->mapname, waiter->event_nr);
330                 return 1;
331         }
332
333         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
334                 condlog(0, "%s: devmap event #%i dm_task_set_name error",
335                                 waiter->mapname, waiter->event_nr);
336                 dm_task_destroy(waiter->dmt);
337                 return 1;
338         }
339
340         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
341                                                       waiter->event_nr)) {
342                 condlog(0, "%s: devmap event #%i dm_task_set_event_nr error",
343                                 waiter->mapname, waiter->event_nr);
344                 dm_task_destroy(waiter->dmt);
345                 return 1;
346         }
347
348         dm_task_no_open_count(waiter->dmt);
349         
350         /* accept wait interruption */
351         set = unblock_signals();
352
353         /* interruption spits messages */
354         dm_shut_log();
355
356         /* wait */
357         r = dm_task_run(waiter->dmt);
358
359         /* wait is over : event or interrupt */
360         pthread_sigmask(SIG_SETMASK, &set, NULL);
361         //dm_restore_log();
362
363         if (!r) /* wait interrupted by signal */
364                 return -1;
365
366         dm_task_destroy(waiter->dmt);
367         waiter->dmt = NULL;
368         waiter->event_nr++;
369
370         /*
371          * upon event ...
372          */
373         while (1) {
374                 condlog(3, "%s: devmap event #%i",
375                                 waiter->mapname, waiter->event_nr);
376
377                 /*
378                  * event might be :
379                  *
380                  * 1) a table reload, which means our mpp structure is
381                  *    obsolete : refresh it through update_multipath()
382                  * 2) a path failed by DM : mark as such through
383                  *    update_multipath()
384                  * 3) map has gone away : stop the thread.
385                  * 4) a path reinstate : nothing to do
386                  * 5) a switch group : nothing to do
387                  */
388                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
389                 lock(waiter->vecs->lock);
390                 r = update_multipath(waiter->vecs, waiter->mapname);
391                 lock_cleanup_pop(waiter->vecs->lock);
392
393                 if (r)
394                         return -1; /* stop the thread */
395
396                 event_nr = dm_geteventnr(waiter->mapname);
397
398                 if (waiter->event_nr == event_nr)
399                         return 1; /* upon problem reschedule 1s later */
400
401                 waiter->event_nr = event_nr;
402         }
403         return -1; /* never reach there */
404 }
405
406 static void *
407 waitevent (void * et)
408 {
409         int r;
410         struct event_thread *waiter;
411
412         mlockall(MCL_CURRENT | MCL_FUTURE);
413
414         waiter = (struct event_thread *)et;
415         pthread_cleanup_push(free_waiter, et);
416
417         while (1) {
418                 r = waiteventloop(waiter);
419
420                 if (r < 0)
421                         break;
422
423                 sleep(r);
424         }
425
426         pthread_cleanup_pop(1);
427         return NULL;
428 }
429
430 static int
431 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
432 {
433         pthread_attr_t attr;
434         struct event_thread * wp;
435
436         if (!mpp)
437                 return 0;
438
439         if (pthread_attr_init(&attr))
440                 goto out;
441
442         pthread_attr_setstacksize(&attr, 32 * 1024);
443         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
444
445         wp = alloc_waiter();
446
447         if (!wp)
448                 goto out;
449
450         mpp->waiter = (void *)wp;
451         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
452         wp->vecs = vecs;
453
454         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
455                 condlog(0, "%s: cannot create event checker", wp->mapname);
456                 goto out1;
457         }
458         condlog(2, "%s: event checker started", wp->mapname);
459
460         return 0;
461 out1:
462         free_waiter(wp);
463         mpp->waiter = NULL;
464 out:
465         condlog(0, "failed to start waiter thread");
466         return 1;
467 }
468
469 int
470 uev_add_map (char * devname, struct vectors * vecs)
471 {
472         int major, minor;
473         char dev_t[BLK_DEV_SIZE];
474         char * alias;
475         struct multipath * mpp;
476
477         if (sscanf(devname, "dm-%d", &minor) == 1 &&
478             !sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE) &&
479             sscanf(dev_t, "%d:%d", &major, &minor) == 2)
480                 alias = dm_mapname(major, minor);
481         else
482                 alias = STRDUP(devname);
483                 
484         if (!alias)
485                 return 1;
486         
487         if (!dm_type(alias, DEFAULT_TARGET)) {
488                 condlog(4, "%s: not a multipath map", alias);
489                 FREE(alias);
490                 return 0;
491         }
492
493         mpp = find_mp_by_alias(vecs->mpvec, alias);
494
495         if (mpp) {
496                 /*
497                  * Not really an error -- we generate our own uevent
498                  * if we create a multipath mapped device as a result
499                  * of uev_add_path
500                  */
501                 condlog(0, "%s: spurious uevent, devmap already registered",
502                         devname);
503                 FREE(alias);
504                 return 0;
505         }
506
507         /*
508          * now we can register the map
509          */
510         if ((mpp = add_map_without_path(vecs, minor, alias,
511                                         start_waiter_thread))) {
512                 condlog(3, "%s devmap %s added", devname, alias);
513                 return 0;
514         }
515
516         condlog(0, "%s: uev_add_map failed", alias);
517         return 1;
518 }
519
520 int
521 uev_remove_map (char * devname, struct vectors * vecs)
522 {
523         int minor;
524         struct multipath * mpp;
525
526         if (sscanf(devname, "dm-%d", &minor) == 1)
527                 mpp = find_mp_by_minor(vecs->mpvec, minor);
528         else
529                 mpp = find_mp_by_alias(vecs->mpvec, devname);
530
531         if (!mpp) {
532                 condlog(3, "%s: devmap not registered, can't remove",
533                         devname);
534                 return 0;
535         }
536
537         condlog(2, "remove %s devmap", mpp->alias);
538         remove_map(mpp, vecs, stop_waiter_thread, 1);
539
540         return 0;
541 }
542
543 int
544 uev_add_path (char * devname, struct vectors * vecs)
545 {
546         struct path * pp;
547
548         pp = find_path_by_dev(vecs->pathvec, devname);
549
550         if (pp) {
551                 condlog(3, "%s: already in pathvec", devname);
552                 return 1;
553         }
554         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
555                        devname, DI_SYSFS | DI_WWID);
556
557         if (!pp) {
558                 condlog(0, "%s: failed to store path info", devname);
559                 return 1;
560         }
561
562         condlog(2, "%s: path checker registered", devname);
563         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
564
565         if (pp->mpp) {
566                 condlog(4, "%s: ownership set to %s",
567                                 pp->dev_t, pp->mpp->alias);
568         } else {
569                 condlog(4, "%s: orphaned", pp->dev_t);
570                 orphan_path(pp);
571         }
572
573         return 0;
574 }
575
576 int
577 uev_remove_path (char * devname, struct vectors * vecs)
578 {
579         int i;
580         struct path * pp;
581
582         pp = find_path_by_dev(vecs->pathvec, devname);
583
584         if (!pp) {
585                 condlog(3, "%s: not in pathvec", devname);
586                 return 1;
587         }
588
589         if (pp->mpp && pp->state == PATH_UP)
590                 update_queue_mode_del_path(pp->mpp);
591
592         condlog(2, "remove %s path checker", devname);
593         i = find_slot(vecs->pathvec, (void *)pp);
594         vector_del_slot(vecs->pathvec, i);
595         free_path(pp);
596
597         return 0;
598 }
599
600 int
601 show_paths (char ** r, int * len, struct vectors * vecs)
602 {
603         int i;
604         struct path * pp;
605         char * c;
606         char * reply;
607         int maxlen = INITIAL_REPLY_LEN;
608         int again = 1;
609
610         get_path_layout(vecs->pathvec);
611         reply = MALLOC(maxlen);
612
613         while (again) {
614                 if (!reply)
615                         return 1;
616
617                 c = reply;
618
619                 if (VECTOR_SIZE(vecs->pathvec) > 0)
620                         c += snprint_path_header(c, reply + maxlen - c,
621                                                  PRINT_PATH_CHECKER);
622
623                 vector_foreach_slot(vecs->pathvec, pp, i)
624                         c += snprint_path(c, reply + maxlen - c,
625                                           PRINT_PATH_CHECKER, pp);
626
627                 again = ((c - reply) == (maxlen - 1));
628
629                 if (again)
630                         reply = REALLOC(reply, maxlen *= 2);
631
632         }
633         *r = reply;
634         *len = (int)(c - reply + 1);
635         return 0;
636 }
637
638 int
639 show_maps (char ** r, int *len, struct vectors * vecs)
640 {
641         int i;
642         struct multipath * mpp;
643         char * c;
644         char * reply;
645         int maxlen = INITIAL_REPLY_LEN;
646         int again = 1;
647
648         get_map_layout(vecs->mpvec);
649         reply = MALLOC(maxlen);
650
651         while (again) {
652                 if (!reply)
653                         return 1;
654
655                 c = reply;
656                 if (VECTOR_SIZE(vecs->mpvec) > 0)
657                         c += snprint_map_header(c, reply + maxlen - c,
658                                                 PRINT_MAP_FAILBACK);
659
660                 vector_foreach_slot(vecs->mpvec, mpp, i)
661                         c += snprint_map(c, reply + maxlen - c,
662                                          PRINT_MAP_FAILBACK, mpp);
663
664                 again = ((c - reply) == (maxlen - 1));
665
666                 if (again)
667                         reply = REALLOC(reply, maxlen *= 2);
668         }
669         *r = reply;
670         *len = (int)(c - reply + 1);
671         return 0;
672 }
673
674 int
675 dump_pathvec (char ** r, int * len, struct vectors * vecs)
676 {
677         int i;
678         struct path * pp;
679         char * reply;
680         char * p;
681
682         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
683         reply = (char *)MALLOC(*len);
684         *r = reply;
685
686         if (!reply)
687                 return 1;
688
689         p = reply;
690
691         vector_foreach_slot (vecs->pathvec, pp, i) {
692                 memcpy((void *)p, pp, sizeof(struct path));
693                 p += sizeof(struct path);
694         }
695
696         /* return negative to hint caller not to add "ok" to the dump */
697         return -1;
698 }
699
700 static int
701 map_discovery (struct vectors * vecs)
702 {
703         int i;
704         struct multipath * mpp;
705
706         if (dm_get_maps(vecs->mpvec, "multipath"))
707                 return 1;
708
709         vector_foreach_slot (vecs->mpvec, mpp, i) {
710                 if (setup_multipath(vecs, mpp))
711                         return 1;
712                 if (start_waiter_thread(mpp, vecs))
713                         return 1;
714         }
715
716         return 0;
717 }
718
719 int
720 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
721 {
722         struct vectors * vecs;
723         int r;
724         
725         *reply = NULL;
726         *len = 0;
727         vecs = (struct vectors *)trigger_data;
728
729         pthread_cleanup_push(cleanup_lock, vecs->lock);
730         lock(vecs->lock);
731
732         r = parse_cmd(str, reply, len, vecs);
733
734         if (r > 0) {
735                 *reply = STRDUP("fail\n");
736                 *len = strlen(*reply) + 1;
737                 r = 1;
738         }
739         else if (!r && *len == 0) {
740                 *reply = STRDUP("ok\n");
741                 *len = strlen(*reply) + 1;
742                 r = 0;
743         }
744         /* else if (r < 0) leave *reply alone */
745
746         lock_cleanup_pop(vecs->lock);
747         return r;
748 }
749
750 static int
751 uev_discard(char * devpath)
752 {
753         char a[10], b[10];
754
755         /*
756          * keep only block devices, discard partitions
757          */
758         if (sscanf(devpath, "/block/%10s", a) != 1 ||
759             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
760                 condlog(4, "discard event on %s", devpath);
761                 return 1;
762         }
763         return 0;
764 }
765
766 int 
767 uev_trigger (struct uevent * uev, void * trigger_data)
768 {
769         int r = 0;
770         char devname[32];
771         struct vectors * vecs;
772
773         vecs = (struct vectors *)trigger_data;
774
775         if (uev_discard(uev->devpath))
776                 return 1;
777
778         basename(uev->devpath, devname);
779         lock(vecs->lock);
780
781         /*
782          * device map add/remove event
783          */
784         if (!strncmp(devname, "dm-", 3)) {
785                 if (!strncmp(uev->action, "add", 3)) {
786                         r = uev_add_map(devname, vecs);
787                         goto out;
788                 }
789 #if 0
790                 if (!strncmp(uev->action, "remove", 6)) {
791                         r = uev_remove_map(devname, vecs);
792                         goto out;
793                 }
794 #endif
795                 goto out;
796         }
797         
798         /*
799          * path add/remove event
800          */
801         if (blacklist(conf->blist, devname))
802                 goto out;
803
804         if (!strncmp(uev->action, "add", 3)) {
805                 r = uev_add_path(devname, vecs);
806                 goto out;
807         }
808         if (!strncmp(uev->action, "remove", 6)) {
809                 r = uev_remove_path(devname, vecs);
810                 goto out;
811         }
812
813 out:
814         unlock(vecs->lock);
815         return r;
816 }
817
818 static void *
819 ueventloop (void * ap)
820 {
821         if (uevent_listen(&uev_trigger, ap))
822                 fprintf(stderr, "error starting uevent listener");
823                 
824         return NULL;
825 }
826
827 static void *
828 uxlsnrloop (void * ap)
829 {
830         if (load_keys())
831                 return NULL;
832         
833         if (alloc_handlers())
834                 return NULL;
835
836         add_handler(LIST+PATHS, cli_list_paths);
837         add_handler(LIST+MAPS, cli_list_maps);
838         add_handler(ADD+PATH, cli_add_path);
839         add_handler(DEL+PATH, cli_del_path);
840         add_handler(ADD+MAP, cli_add_map);
841         add_handler(DEL+MAP, cli_del_map);
842         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
843         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
844         add_handler(RECONFIGURE, cli_reconfigure);
845         add_handler(SUSPEND+MAP, cli_suspend);
846         add_handler(RESUME+MAP, cli_resume);
847         add_handler(REINSTATE+PATH, cli_reinstate);
848         add_handler(FAIL+PATH, cli_fail);
849
850         uxsock_listen(&uxsock_trigger, ap);
851
852         return NULL;
853 }
854
855 static int
856 exit_daemon (int status)
857 {
858         if (status != 0)
859                 fprintf(stderr, "bad exit status. see daemon.log\n");
860
861         condlog(3, "unlink pidfile");
862         unlink(DEFAULT_PIDFILE);
863
864         lock(&exit_mutex);
865         pthread_cond_signal(&exit_cond);
866         unlock(&exit_mutex);
867
868         return status;
869 }
870
871 static void
872 fail_path (struct path * pp)
873 {
874         if (!pp->mpp)
875                 return;
876
877         condlog(2, "checker failed path %s in map %s",
878                  pp->dev_t, pp->mpp->alias);
879
880         dm_fail_path(pp->mpp->alias, pp->dev_t);
881         update_queue_mode_del_path(pp->mpp);
882 }
883
884 /*
885  * caller must have locked the path list before calling that function
886  */
887 static void
888 reinstate_path (struct path * pp)
889 {
890         if (!pp->mpp)
891                 return;
892
893         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
894                 condlog(0, "%s: reinstate failed", pp->dev_t);
895         else {
896                 condlog(2, "%s: reinstated", pp->dev_t);
897                 update_queue_mode_add_path(pp->mpp);
898         }
899 }
900
901 static void
902 enable_group(struct path * pp)
903 {
904         struct pathgroup * pgp;
905
906         /*
907          * if path is added through uev_add_path, pgindex can be unset.
908          * next update_strings() will set it, upon map reload event.
909          *
910          * we can safely return here, because upon map reload, all
911          * PG will be enabled.
912          */
913         if (!pp->mpp->pg || !pp->pgindex)
914                 return;
915
916         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
917         
918         if (pgp->status == PGSTATE_DISABLED) {
919                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
920                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
921         }
922 }
923
924 static void
925 mpvec_garbage_collector (struct vectors * vecs)
926 {
927         struct multipath * mpp;
928         int i;
929
930         vector_foreach_slot (vecs->mpvec, mpp, i) {
931                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
932                         condlog(2, "%s: remove dead map", mpp->alias);
933                         remove_map(mpp, vecs, stop_waiter_thread, 1);
934                         i--;
935                 }
936         }
937 }
938
939 static void
940 defered_failback_tick (vector mpvec)
941 {
942         struct multipath * mpp;
943         int i;
944
945         vector_foreach_slot (mpvec, mpp, i) {
946                 /*
947                  * defered failback getting sooner
948                  */
949                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
950                         mpp->failback_tick--;
951
952                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
953                                 switch_pathgroup(mpp);
954                 }
955         }
956 }
957
958 static void
959 retry_count_tick(vector mpvec)
960 {
961         struct multipath *mpp;
962         int i;
963
964         vector_foreach_slot (mpvec, mpp, i) {
965                 if (mpp->retry_tick) {
966                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
967                         if(--mpp->retry_tick == 0) {
968                                 dm_queue_if_no_path(mpp->alias, 0);
969                                 condlog(2, "%s: Disable queueing", mpp->alias);
970                         }
971                 }
972         }
973 }
974
975 static void *
976 checkerloop (void *ap)
977 {
978         struct vectors *vecs;
979         struct path *pp;
980         int i, count = 0;
981         int newstate;
982         char checker_msg[MAX_CHECKER_MSG_SIZE];
983
984         mlockall(MCL_CURRENT | MCL_FUTURE);
985
986         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
987         vecs = (struct vectors *)ap;
988
989         condlog(2, "path checkers start up");
990
991         /*
992          * init the path check interval
993          */
994         vector_foreach_slot (vecs->pathvec, pp, i) {
995                 pp->checkint = conf->checkint;
996         }
997
998         while (1) {
999                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1000                 lock(vecs->lock);
1001                 condlog(4, "tick");
1002
1003                 vector_foreach_slot (vecs->pathvec, pp, i) {
1004                         if (!pp->mpp)
1005                                 continue;
1006
1007                         if (pp->tick && --pp->tick)
1008                                 continue; /* don't check this path yet */
1009
1010                         /*
1011                          * provision a next check soonest,
1012                          * in case we exit abnormaly from here
1013                          */
1014                         pp->tick = conf->checkint;
1015                         
1016                         if (!pp->checkfn) {
1017                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1018                                 select_checkfn(pp);
1019                         }
1020
1021                         if (!pp->checkfn) {
1022                                 condlog(0, "%s: checkfn is void", pp->dev);
1023                                 continue;
1024                         }
1025                         newstate = pp->checkfn(pp->fd, checker_msg,
1026                                                &pp->checker_context);
1027                         
1028                         if (newstate < 0) {
1029                                 condlog(2, "%s: unusable path", pp->dev);
1030                                 pathinfo(pp, conf->hwtable, 0);
1031                                 continue;
1032                         }
1033
1034                         if (newstate != pp->state) {
1035                                 pp->state = newstate;
1036                                 LOG_MSG(1, checker_msg);
1037
1038                                 /*
1039                                  * upon state change, reset the checkint
1040                                  * to the shortest delay
1041                                  */
1042                                 pp->checkint = conf->checkint;
1043
1044                                 if (newstate == PATH_DOWN ||
1045                                     newstate == PATH_SHAKY ||
1046                                     update_multipath_strings(pp->mpp,
1047                                                              vecs->pathvec)) {
1048                                         /*
1049                                          * proactively fail path in the DM
1050                                          */
1051                                         fail_path(pp);
1052
1053                                         /*
1054                                          * cancel scheduled failback
1055                                          */
1056                                         pp->mpp->failback_tick = 0;
1057
1058                                         continue;
1059                                 }
1060
1061                                 /*
1062                                  * reinstate this path
1063                                  */
1064                                 reinstate_path(pp);
1065
1066                                 /*
1067                                  * schedule [defered] failback
1068                                  */
1069                                 if (pp->mpp->pgfailback > 0)
1070                                         pp->mpp->failback_tick =
1071                                                 pp->mpp->pgfailback + 1;
1072                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1073                                     need_switch_pathgroup(pp->mpp, 1))
1074                                         switch_pathgroup(pp->mpp);
1075
1076                                 /*
1077                                  * if at least one path is up in a group, and
1078                                  * the group is disabled, re-enable it
1079                                  */
1080                                 if (newstate == PATH_UP)
1081                                         enable_group(pp);
1082                         }
1083                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1084                                 LOG_MSG(4, checker_msg);
1085                                 /*
1086                                  * double the next check delay.
1087                                  * max at conf->max_checkint
1088                                  */
1089                                 if (pp->checkint < (conf->max_checkint / 2))
1090                                         pp->checkint = 2 * pp->checkint;
1091                                 else
1092                                         pp->checkint = conf->max_checkint;
1093
1094                                 pp->tick = pp->checkint;
1095                                 condlog(4, "%s: delay next check %is",
1096                                                 pp->dev_t, pp->tick);
1097
1098                         }
1099                         pp->state = newstate;
1100
1101                         /*
1102                          * path prio refreshing
1103                          */
1104                         condlog(4, "path prio refresh");
1105                         pathinfo(pp, conf->hwtable, DI_PRIO);
1106
1107                         if (need_switch_pathgroup(pp->mpp, 0)) {
1108                                 if (pp->mpp->pgfailback > 0)
1109                                         pp->mpp->failback_tick =
1110                                                 pp->mpp->pgfailback + 1;
1111                                 else if (pp->mpp->pgfailback ==
1112                                                 -FAILBACK_IMMEDIATE)
1113                                         switch_pathgroup(pp->mpp);
1114                         }
1115                 }
1116                 defered_failback_tick(vecs->mpvec);
1117                 retry_count_tick(vecs->mpvec);
1118
1119                 if (count)
1120                         count--;
1121                 else {
1122                         condlog(4, "map garbage collection");
1123                         mpvec_garbage_collector(vecs);
1124                         count = MAPGCINT;
1125                 }
1126                 
1127                 lock_cleanup_pop(vecs->lock);
1128                 sleep(1);
1129         }
1130         return NULL;
1131 }
1132
1133 int
1134 configure (struct vectors * vecs, int start_waiters)
1135 {
1136         struct multipath * mpp;
1137         struct path * pp;
1138         vector mpvec;
1139         int i;
1140
1141         if (!(vecs->pathvec = vector_alloc()))
1142                 return 1;
1143         
1144         if (!(vecs->mpvec = vector_alloc()))
1145                 return 1;
1146         
1147         if (!(mpvec = vector_alloc()))
1148                 return 1;
1149
1150         /*
1151          * probe for current path (from sysfs) and map (from dm) sets
1152          */
1153         path_discovery(vecs->pathvec, conf, DI_ALL);
1154
1155         vector_foreach_slot (vecs->pathvec, pp, i)
1156                 pp->checkint = conf->checkint;
1157
1158         if (map_discovery(vecs))
1159                 return 1;
1160
1161         /*
1162          * create new set of maps & push changed ones into dm
1163          */
1164         if (coalesce_paths(vecs, mpvec))
1165                 return 1;
1166
1167         /*
1168          * may need to remove some maps which are no longer relevant
1169          * e.g., due to blacklist changes in conf file
1170          */
1171         if (coalesce_maps(vecs, mpvec))
1172                 return 1;
1173
1174         if (conf->verbosity > 2)
1175                 vector_foreach_slot(mpvec, mpp, i)
1176                         print_map(mpp);
1177
1178         /*
1179          * purge dm of old maps
1180          */
1181         remove_maps(vecs, NULL);
1182
1183         /*
1184          * save new set of maps formed by considering current path state
1185          */
1186         vecs->mpvec = mpvec;
1187
1188         /*
1189          * start dm event waiter threads for these new maps
1190          */
1191         vector_foreach_slot(vecs->mpvec, mpp, i) {
1192                 if (setup_multipath(vecs, mpp))
1193                         return 1;
1194                 if (start_waiters)
1195                         if (start_waiter_thread(mpp, vecs))
1196                                 return 1;
1197         }
1198         return 0;
1199 }
1200
1201 int
1202 reconfigure (struct vectors * vecs)
1203 {
1204         struct config * old = conf;
1205
1206         condlog(0, "reconfigure");
1207
1208         /*
1209          * free old map and path vectors ... they use old conf state
1210          */
1211         if (VECTOR_SIZE(vecs->mpvec))
1212                 remove_maps(vecs, stop_waiter_thread);
1213
1214         if (VECTOR_SIZE(vecs->pathvec))
1215                 free_pathvec(vecs->pathvec, FREE_PATHS);
1216
1217         conf = NULL;
1218
1219         if (load_config(DEFAULT_CONFIGFILE))
1220                 return 1;
1221
1222         conf->verbosity = old->verbosity;
1223
1224         if (!conf->checkint) {
1225                 conf->checkint = CHECKINT;
1226                 conf->max_checkint = MAX_CHECKINT;
1227         }
1228         configure(vecs, 1);
1229         free_config(old);
1230         return 0;
1231 }
1232
1233 static struct vectors *
1234 init_vecs (void)
1235 {
1236         struct vectors * vecs;
1237
1238         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1239
1240         if (!vecs)
1241                 return NULL;
1242
1243         vecs->lock = 
1244                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1245
1246         if (!vecs->lock)
1247                 goto out;
1248
1249         vecs->pathvec = vector_alloc();
1250
1251         if (!vecs->pathvec)
1252                 goto out1;
1253                 
1254         vecs->mpvec = vector_alloc();
1255
1256         if (!vecs->mpvec)
1257                 goto out2;
1258         
1259         pthread_mutex_init(vecs->lock, NULL);
1260
1261         return vecs;
1262
1263 out2:
1264         vector_free(vecs->pathvec);
1265 out1:
1266         FREE(vecs->lock);
1267 out:
1268         FREE(vecs);
1269         condlog(0, "failed to init paths");
1270         return NULL;
1271 }
1272
1273 static void *
1274 signal_set(int signo, void (*func) (int))
1275 {
1276         int r;
1277         struct sigaction sig;
1278         struct sigaction osig;
1279
1280         sig.sa_handler = func;
1281         sigemptyset(&sig.sa_mask);
1282         sig.sa_flags = 0;
1283
1284         r = sigaction(signo, &sig, &osig);
1285
1286         if (r < 0)
1287                 return (SIG_ERR);
1288         else
1289                 return (osig.sa_handler);
1290 }
1291
1292 static void
1293 sighup (int sig)
1294 {
1295         condlog(3, "SIGHUP received");
1296
1297         lock(gvecs->lock);
1298         reconfigure(gvecs);
1299         unlock(gvecs->lock);
1300
1301 #ifdef _DEBUG_
1302         dbg_free_final(NULL);
1303 #endif
1304 }
1305
1306 static void
1307 sigend (int sig)
1308 {
1309         exit_daemon(0);
1310 }
1311
1312 static void
1313 sigusr1 (int sig)
1314 {
1315         condlog(3, "SIGUSR1 received");
1316 }
1317
1318 static void
1319 signal_init(void)
1320 {
1321         signal_set(SIGHUP, sighup);
1322         signal_set(SIGUSR1, sigusr1);
1323         signal_set(SIGINT, sigend);
1324         signal_set(SIGTERM, sigend);
1325         signal_set(SIGKILL, sigend);
1326 }
1327
1328 static void
1329 setscheduler (void)
1330 {
1331         int res;
1332         static struct sched_param sched_param = {
1333                 sched_priority: 99
1334         };
1335
1336         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1337
1338         if (res == -1)
1339                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1340         return;
1341 }
1342
1343 static void
1344 set_oom_adj (int val)
1345 {
1346         FILE *fp;
1347
1348         fp = fopen("/proc/self/oom_adj", "w");
1349
1350         if (!fp)
1351                 return;
1352
1353         fprintf(fp, "%i", val);
1354         fclose(fp);
1355 }
1356         
1357 static int
1358 child (void * param)
1359 {
1360         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1361         pthread_attr_t attr;
1362         struct vectors * vecs;
1363
1364         mlockall(MCL_CURRENT | MCL_FUTURE);
1365
1366         if (logsink)
1367                 log_thread_start();
1368
1369         condlog(2, "--------start up--------");
1370         condlog(2, "read " DEFAULT_CONFIGFILE);
1371
1372         if (load_config(DEFAULT_CONFIGFILE))
1373                 exit(1);
1374
1375         setlogmask(LOG_UPTO(conf->verbosity + 3));
1376
1377         /*
1378          * fill the voids left in the config file
1379          */
1380         if (!conf->checkint) {
1381                 conf->checkint = CHECKINT;
1382                 conf->max_checkint = MAX_CHECKINT;
1383         }
1384
1385         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1386                 if (logsink)
1387                         log_thread_stop();
1388
1389                 exit(1);
1390         }
1391         signal_init();
1392         setscheduler();
1393         set_oom_adj(-17);
1394         vecs = gvecs = init_vecs();
1395
1396         if (!vecs)
1397                 exit(1);
1398
1399         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1400                 condlog(0, "can not find sysfs mount point");
1401                 exit(1);
1402         }
1403
1404         /*
1405          * fetch and configure both paths and multipaths
1406          */
1407         if (configure(vecs, 1)) {
1408                 condlog(0, "failure during configuration");
1409                 exit(1);
1410         }
1411
1412         /*
1413          * start threads
1414          */
1415         pthread_attr_init(&attr);
1416         pthread_attr_setstacksize(&attr, 64 * 1024);
1417         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1418         
1419         pthread_create(&check_thr, &attr, checkerloop, vecs);
1420         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1421         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1422
1423         pthread_cond_wait(&exit_cond, &exit_mutex);
1424
1425         /*
1426          * exit path
1427          */
1428         lock(vecs->lock);
1429         remove_maps(vecs, stop_waiter_thread);
1430         free_pathvec(vecs->pathvec, FREE_PATHS);
1431
1432         pthread_cancel(check_thr);
1433         pthread_cancel(uevent_thr);
1434         pthread_cancel(uxlsnr_thr);
1435
1436         free_keys(keys);
1437         keys = NULL;
1438         free_handlers(handlers);
1439         handlers = NULL;
1440         free_polls();
1441
1442         unlock(vecs->lock);
1443         pthread_mutex_destroy(vecs->lock);
1444         FREE(vecs->lock);
1445         vecs->lock = NULL;
1446         FREE(vecs);
1447         vecs = NULL;
1448         free_config(conf);
1449         conf = NULL;
1450
1451         condlog(2, "--------shut down-------");
1452         
1453         if (logsink)
1454                 log_thread_stop();
1455
1456         dm_lib_release();
1457         dm_lib_exit();
1458
1459 #ifdef _DEBUG_
1460         dbg_free_final(NULL);
1461 #endif
1462
1463         exit(0);
1464 }
1465
1466 static int
1467 daemonize(void)
1468 {
1469         int pid;
1470         int in_fd, out_fd;
1471
1472         if( (pid = fork()) < 0){
1473                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1474                 return -1;
1475         }
1476         else if (pid != 0)
1477                 return pid;
1478
1479         setsid();
1480
1481         if ( (pid = fork()) < 0)
1482                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1483         else if (pid != 0)
1484                 _exit(0);
1485
1486         in_fd = open("/dev/null", O_RDONLY);
1487         if (in_fd < 0){
1488                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1489                         strerror(errno));
1490                 _exit(0);
1491         }
1492         out_fd = open("/dev/console", O_WRONLY);
1493         if (out_fd < 0){
1494                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1495                         strerror(errno));
1496                 _exit(0);
1497         }
1498
1499         close(STDIN_FILENO);
1500         dup(in_fd);
1501         close(STDOUT_FILENO);
1502         dup(out_fd);
1503         close(STDERR_FILENO);
1504         dup(out_fd);
1505
1506         close(in_fd);
1507         close(out_fd);
1508         chdir("/");
1509         umask(0);
1510         return 0;
1511 }
1512
1513 int
1514 main (int argc, char *argv[])
1515 {
1516         extern char *optarg;
1517         extern int optind;
1518         int arg;
1519         int err;
1520         
1521         logsink = 1;
1522
1523         if (getuid() != 0) {
1524                 fprintf(stderr, "need to be root\n");
1525                 exit(1);
1526         }
1527
1528         /* make sure we don't lock any path */
1529         chdir("/");
1530         umask(umask(077) | 022);
1531
1532         conf = alloc_config();
1533
1534         if (!conf)
1535                 exit(1);
1536
1537         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1538         switch(arg) {
1539                 case 'd':
1540                         logsink = 0;
1541                         //debug=1; /* ### comment me out ### */
1542                         break;
1543                 case 'v':
1544                         if (sizeof(optarg) > sizeof(char *) ||
1545                             !isdigit(optarg[0]))
1546                                 exit(1);
1547
1548                         conf->verbosity = atoi(optarg);
1549                         break;
1550                 case 'k':
1551                         uxclnt(optarg);
1552                         exit(0);
1553                 default:
1554                         ;
1555                 }
1556         }
1557
1558         if (!logsink)
1559                 err = 0;
1560         else
1561                 err = daemonize();
1562         
1563         if (err < 0)
1564                 /* error */
1565                 exit(1);
1566         else if (err > 0)
1567                 /* parent dies */
1568                 exit(0);
1569         else
1570                 /* child lives */
1571                 return (child(NULL));
1572 }