[multipathd] remove daemonize() unused variable
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 #include <unistd.h>
2 #include <sys/stat.h>
3 #include <libdevmapper.h>
4 #include <wait.h>
5 #include <sys/mman.h>
6 #include <sys/types.h>
7 #include <fcntl.h>
8 #include <errno.h>
9
10 /*
11  * libsysfs
12  */
13 #include <sysfs/libsysfs.h>
14 #include <sysfs/dlist.h>
15
16 /*
17  * libcheckers
18  */
19 #include <checkers.h>
20 #include <path_state.h>
21
22 /*
23  * libmultipath
24  */
25 #include <parser.h>
26 #include <vector.h>
27 #include <memory.h>
28 #include <config.h>
29 #include <callout.h>
30 #include <util.h>
31 #include <blacklist.h>
32 #include <hwtable.h>
33 #include <defaults.h>
34 #include <structs.h>
35 #include <dmparser.h>
36 #include <devmapper.h>
37 #include <dict.h>
38 #include <discovery.h>
39 #include <debug.h>
40 #include <propsel.h>
41 #include <uevent.h>
42 #include <switchgroup.h>
43 #include <path_state.h>
44 #include <print.h>
45
46 #include "main.h"
47 #include "pidfile.h"
48 #include "uxlsnr.h"
49 #include "uxclnt.h"
50 #include "cli.h"
51 #include "cli_handlers.h"
52
53 #define FILE_NAME_SIZE 256
54 #define CMDSIZE 160
55
56 #define LOG_MSG(a,b) \
57         if (strlen(b)) { \
58                 condlog(a, "%s: %s", pp->dev_t, b); \
59                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
60         }
61
62 #ifdef LCKDBG
63 #define lock(a) \
64         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
65         pthread_mutex_lock(a)
66 #define unlock(a) \
67         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
68         pthread_mutex_unlock(a)
69 #define lock_cleanup_pop(a) \
70         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_cleanup_pop(1);
72 #else
73 #define lock(a) pthread_mutex_lock(a)
74 #define unlock(a) pthread_mutex_unlock(a)
75 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
76 #endif
77
78 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
79 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
80
81 /*
82  * structs
83  */
84 struct event_thread {
85         struct dm_task *dmt;
86         pthread_t thread;
87         int event_nr;
88         char mapname[WWID_SIZE];
89         struct vectors *vecs;
90 };
91
92 static struct event_thread *
93 alloc_waiter (void)
94 {
95
96         struct event_thread * wp;
97
98         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
99
100         return wp;
101 }
102
103 static void
104 free_waiter (void * data)
105 {
106         struct event_thread * wp = (struct event_thread *)data;
107
108         if (wp->dmt)
109                 dm_task_destroy(wp->dmt);
110         FREE(wp);
111 }
112
113 static void
114 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
115 {
116         struct event_thread * wp = (struct event_thread *)mpp->waiter;
117         pthread_t thread;
118         
119         if (!wp) {
120                 condlog(3, "%s: no waiter thread", mpp->alias);
121                 return;
122         }
123         thread = wp->thread;
124
125         if (!wp) {
126                 condlog(3, "%s: thread not started", mpp->alias);
127                 return;
128         }
129         condlog(2, "%s: stop event checker thread", wp->mapname);
130         pthread_kill(thread, SIGHUP);
131 }
132
133 static void
134 cleanup_lock (void * data)
135 {
136         pthread_mutex_unlock((pthread_mutex_t *)data);
137 }
138
139 static void
140 adopt_paths (struct vectors * vecs, struct multipath * mpp)
141 {
142         int i;
143         struct path * pp;
144
145         if (!mpp)
146                 return;
147
148         vector_foreach_slot (vecs->pathvec, pp, i) {
149                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
150                         condlog(4, "%s ownership set", pp->dev_t);
151                         pp->mpp = mpp;
152                 }
153         }
154 }
155
156 static void
157 orphan_path (struct path * pp)
158 {
159         pp->mpp = NULL;
160         pp->checkfn = NULL;
161         pp->dmstate = PSTATE_UNDEF;
162         pp->checker_context = NULL;
163         pp->getuid = NULL;
164         pp->getprio = NULL;
165
166         if (pp->fd >= 0)
167                 close(pp->fd);
168
169         pp->fd = -1;
170 }
171
172 static void
173 orphan_paths (struct vectors * vecs, struct multipath * mpp)
174 {
175         int i;
176         struct path * pp;
177
178         vector_foreach_slot (vecs->pathvec, pp, i) {
179                 if (pp->mpp == mpp) {
180                         condlog(4, "%s is orphaned", pp->dev_t);
181                         orphan_path(pp);
182                 }
183         }
184 }
185
186 static int
187 update_multipath_table (struct multipath *mpp, vector pathvec)
188 {
189         if (!mpp)
190                 return 1;
191
192         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
193                 return 1;
194
195         if (disassemble_map(pathvec, mpp->params, mpp))
196                 return 1;
197
198         return 0;
199 }
200
201 static int
202 update_multipath_status (struct multipath *mpp)
203 {
204         if (!mpp)
205                 return 1;
206
207         if(dm_get_status(mpp->alias, mpp->status))
208                 return 1;
209
210         if (disassemble_status(mpp->status, mpp))
211                 return 1;
212
213         return 0;
214 }
215
216 static int
217 update_multipath_strings (struct multipath *mpp, vector pathvec)
218 {
219         if (mpp->selector) {
220                 FREE(mpp->selector);
221                 mpp->selector = NULL;
222         }
223
224         if (mpp->features) {
225                 FREE(mpp->features);
226                 mpp->features = NULL;
227         }
228
229         if (mpp->hwhandler) {
230                 FREE(mpp->hwhandler);
231                 mpp->hwhandler = NULL;
232         }
233
234         free_pgvec(mpp->pg, KEEP_PATHS);
235         mpp->pg = NULL;
236
237         if (update_multipath_table(mpp, pathvec))
238                 return 1;
239
240         if (update_multipath_status(mpp))
241                 return 1;
242
243         return 0;
244 }
245
246 static void
247 set_multipath_wwid (struct multipath * mpp)
248 {
249         if (mpp->wwid)
250                 return;
251
252         dm_get_uuid(mpp->alias, mpp->wwid);
253 }
254
255 static int
256 pathcount (struct multipath *mpp, int state)
257 {
258         struct pathgroup *pgp;
259         struct path *pp;
260         int i, j;
261         int count = 0;
262
263         vector_foreach_slot (mpp->pg, pgp, i)
264                 vector_foreach_slot (pgp->paths, pp, j)
265                         if (pp->state == state)
266                                 count++;
267         return count;
268 }
269
270 /*
271  * mpp->no_path_retry:
272  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
273  *   -1 (FAIL)  : fail_if_no_path
274  *    0 (UNDEF) : nothing
275  *   >0         : queue_if_no_path enabled, turned off after polling n times
276  */
277 static void
278 update_queue_mode_del_path(struct multipath *mpp)
279 {
280         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
281                 /*
282                  * Enter retry mode.
283                  * meaning of +1: retry_tick may be decremented in
284                  *                checkerloop before starting retry.
285                  */
286                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
287                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
288                         mpp->alias, mpp->no_path_retry);
289         }
290         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
291 }
292
293 static void
294 update_queue_mode_add_path(struct multipath *mpp)
295 {
296         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
297                 /* come back to normal mode from retry mode */
298                 mpp->retry_tick = 0;
299                 dm_queue_if_no_path(mpp->alias, 1);
300                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
301                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
302         }
303         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
304 }
305
306 static void
307 set_no_path_retry(struct multipath *mpp)
308 {
309         mpp->retry_tick = 0;
310         mpp->nr_active = pathcount(mpp, PATH_UP);
311         select_no_path_retry(mpp);
312
313         switch (mpp->no_path_retry) {
314         case NO_PATH_RETRY_UNDEF:
315                 break;
316         case NO_PATH_RETRY_FAIL:
317                 dm_queue_if_no_path(mpp->alias, 0);
318                 break;
319         case NO_PATH_RETRY_QUEUE:
320                 dm_queue_if_no_path(mpp->alias, 1);
321                 break;
322         default:
323                 dm_queue_if_no_path(mpp->alias, 1);
324                 if (mpp->nr_active == 0) {
325                         /* Enter retry mode */
326                         mpp->retry_tick = mpp->no_path_retry * conf->checkint;
327                         condlog(1, "%s: Entering recovery mode: max_retries=%d",
328                                 mpp->alias, mpp->no_path_retry);
329                 }
330                 break;
331         }
332 }
333
334 static struct hwentry *
335 extract_hwe_from_path(struct multipath * mpp)
336 {
337         struct path * pp;
338         struct pathgroup * pgp;
339
340         pgp = VECTOR_SLOT(mpp->pg, 0);
341         pp = VECTOR_SLOT(pgp->paths, 0);
342
343         return pp->hwe;
344 }
345
346 static void
347 remove_map (struct multipath * mpp, struct vectors * vecs)
348 {
349         int i;
350
351         stop_waiter_thread(mpp, vecs);
352
353         /*
354          * clear references to this map
355          */
356         orphan_paths(vecs, mpp);
357
358         /*
359          * purge the multipath vector
360          */
361         i = find_slot(vecs->mpvec, (void *)mpp);
362         vector_del_slot(vecs->mpvec, i);
363
364         /*
365          * final free
366          */
367         free_multipath(mpp, KEEP_PATHS);
368         mpp = NULL;
369 }
370
371 static void
372 remove_maps (struct vectors * vecs)
373 {
374         int i;
375         struct multipath * mpp;
376
377         vector_foreach_slot (vecs->mpvec, mpp, i) {
378                 remove_map(mpp, vecs);
379                 i--;
380         }
381
382         vector_free(vecs->mpvec);
383         vecs->mpvec = NULL;
384 }
385
386 static int
387 setup_multipath (struct vectors * vecs, struct multipath * mpp)
388 {
389         if (dm_get_info(mpp->alias, &mpp->dmi))
390                 goto out;
391
392         set_multipath_wwid(mpp);
393         mpp->mpe = find_mpe(mpp->wwid);
394         condlog(4, "discovered map %s", mpp->alias);
395
396         if (update_multipath_strings(mpp, vecs->pathvec))
397                 goto out;
398
399         adopt_paths(vecs, mpp);
400         mpp->hwe = extract_hwe_from_path(mpp);
401         select_pgfailback(mpp);
402         set_no_path_retry(mpp);
403
404         return 0;
405 out:
406         condlog(0, "%s: failed to setup multipath", mpp->alias);
407         remove_map(mpp, vecs);
408         return 1;
409 }
410
411 static int
412 need_switch_pathgroup (struct multipath * mpp, int refresh)
413 {
414         struct pathgroup * pgp;
415         struct path * pp;
416         int i, j;
417
418         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
419                 return 0;
420
421         /*
422          * Refresh path priority values
423          */
424         if (refresh)
425                 vector_foreach_slot (mpp->pg, pgp, i)
426                         vector_foreach_slot (pgp->paths, pp, j)
427                                 pathinfo(pp, conf->hwtable, DI_PRIO);
428
429         mpp->bestpg = select_path_group(mpp);
430
431         if (mpp->bestpg != mpp->nextpg)
432                 return 1;
433
434         return 0;
435 }
436
437 static void
438 switch_pathgroup (struct multipath * mpp)
439 {
440         dm_switchgroup(mpp->alias, mpp->bestpg);
441         condlog(2, "%s: switch to path group #%i",
442                  mpp->alias, mpp->bestpg);
443 }
444
445 static int
446 update_multipath (struct vectors *vecs, char *mapname)
447 {
448         struct multipath *mpp;
449         struct pathgroup  *pgp;
450         struct path *pp;
451         int i, j;
452         int r = 1;
453
454         mpp = find_mp_by_alias(vecs->mpvec, mapname);
455
456         if (!mpp)
457                 goto out;
458
459         free_pgvec(mpp->pg, KEEP_PATHS);
460         mpp->pg = NULL;
461
462         if (setup_multipath(vecs, mpp))
463                 goto out; /* mpp freed in setup_multipath */
464
465         /*
466          * compare checkers states with DM states
467          */
468         vector_foreach_slot (mpp->pg, pgp, i) {
469                 vector_foreach_slot (pgp->paths, pp, j) {
470                         if (pp->dmstate != PSTATE_FAILED)
471                                 continue;
472
473                         if (pp->state != PATH_DOWN) {
474                                 condlog(2, "%s: mark as failed", pp->dev_t);
475                                 pp->state = PATH_DOWN;
476                                 update_queue_mode_del_path(mpp);
477
478                                 /*
479                                  * if opportune,
480                                  * schedule the next check earlier
481                                  */
482                                 if (pp->tick > conf->checkint)
483                                         pp->tick = conf->checkint;
484                         }
485                 }
486         }
487         r = 0;
488 out:
489         if (r)
490                 condlog(0, "failed to update multipath");
491
492         return r;
493 }
494
495 static sigset_t unblock_sighup(void)
496 {
497         sigset_t set, old;
498
499         sigemptyset(&set);
500         sigaddset(&set, SIGHUP);
501         pthread_sigmask(SIG_UNBLOCK, &set, &old);
502         return old;
503 }
504
505 /*
506  * returns the reschedule delay
507  * negative means *stop*
508  */
509 static int
510 waiteventloop (struct event_thread * waiter)
511 {
512         sigset_t set;
513         int event_nr;
514         int r;
515
516         if (!waiter->event_nr)
517                 waiter->event_nr = dm_geteventnr(waiter->mapname);
518
519         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
520                 return 1;
521
522         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
523                 dm_task_destroy(waiter->dmt);
524                 return 1;
525         }
526
527         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
528                                                       waiter->event_nr)) {
529                 dm_task_destroy(waiter->dmt);
530                 return 1;
531         }
532
533         dm_task_no_open_count(waiter->dmt);
534         
535         /* accept wait interruption */
536         set = unblock_sighup();
537
538         /* interruption spits messages */
539         dm_shut_log();
540
541         /* wait */
542         r = dm_task_run(waiter->dmt);
543
544         /* wait is over : event or interrupt */
545         pthread_sigmask(SIG_SETMASK, &set, NULL);
546         //dm_restore_log();
547
548         if (!r) /* wait interrupted by signal */
549                 return -1;
550
551         dm_task_destroy(waiter->dmt);
552         waiter->dmt = NULL;
553         waiter->event_nr++;
554
555         /*
556          * upon event ...
557          */
558         while (1) {
559                 condlog(3, "%s: devmap event #%i",
560                                 waiter->mapname, waiter->event_nr);
561
562                 /*
563                  * event might be :
564                  *
565                  * 1) a table reload, which means our mpp structure is
566                  *    obsolete : refresh it through update_multipath()
567                  * 2) a path failed by DM : mark as such through
568                  *    update_multipath()
569                  * 3) map has gone away : stop the thread.
570                  * 4) a path reinstate : nothing to do
571                  * 5) a switch group : nothing to do
572                  */
573                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
574                 lock(waiter->vecs->lock);
575                 r = update_multipath(waiter->vecs, waiter->mapname);
576                 lock_cleanup_pop(waiter->vecs->lock);
577
578                 if (r)
579                         return -1; /* stop the thread */
580
581                 event_nr = dm_geteventnr(waiter->mapname);
582
583                 if (waiter->event_nr == event_nr)
584                         return 1; /* upon problem reschedule 1s later */
585
586                 waiter->event_nr = event_nr;
587         }
588         return -1; /* never reach there */
589 }
590
591 static void *
592 waitevent (void * et)
593 {
594         int r;
595         struct event_thread *waiter;
596
597         mlockall(MCL_CURRENT | MCL_FUTURE);
598
599         waiter = (struct event_thread *)et;
600         pthread_cleanup_push(free_waiter, et);
601
602         while (1) {
603                 r = waiteventloop(waiter);
604
605                 if (r < 0)
606                         break;
607
608                 sleep(r);
609         }
610
611         pthread_cleanup_pop(1);
612         return NULL;
613 }
614
615 static int
616 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
617 {
618         pthread_attr_t attr;
619         struct event_thread * wp;
620
621         if (!mpp)
622                 return 0;
623
624         if (pthread_attr_init(&attr))
625                 goto out;
626
627         pthread_attr_setstacksize(&attr, 32 * 1024);
628         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
629
630         wp = alloc_waiter();
631
632         if (!wp)
633                 goto out;
634
635         mpp->waiter = (void *)wp;
636         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
637         wp->vecs = vecs;
638
639         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
640                 condlog(0, "%s: cannot create event checker", wp->mapname);
641                 goto out1;
642         }
643         condlog(2, "%s: event checker started", wp->mapname);
644
645         return 0;
646 out1:
647         free_waiter(wp);
648         mpp->waiter = NULL;
649 out:
650         condlog(0, "failed to start waiter thread");
651         return 1;
652 }
653
654 int
655 uev_add_map (char * devname, struct vectors * vecs)
656 {
657         int major, minor;
658         char dev_t[BLK_DEV_SIZE];
659         char * alias;
660         struct multipath * mpp;
661
662         if (sscanf(devname, "dm-%d", &minor) == 1 &&
663             !sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE) &&
664             sscanf(dev_t, "%d:%d", &major, &minor) == 2)
665                 alias = dm_mapname(major, minor);
666         else
667                 alias = STRDUP(devname);
668                 
669         if (!alias)
670                 return 1;
671         
672         if (!dm_type(alias, DEFAULT_TARGET)) {
673                 condlog(4, "%s: not a multipath map", alias);
674                 FREE(alias);
675                 return 0;
676         }
677
678         mpp = find_mp_by_alias(vecs->mpvec, alias);
679
680         if (mpp) {
681                 /*
682                  * this should not happen,
683                  * we missed a remove map event (not sent ?)
684                  */
685                 condlog(2, "%s: already registered", alias);
686                 remove_map(mpp, vecs);
687         }
688
689         /*
690          * now we can allocate
691          */
692         mpp = alloc_multipath();
693
694         if (!mpp)
695                 return 1;
696
697         mpp->alias = alias;
698
699         if (setup_multipath(vecs, mpp))
700                 return 1; /* mpp freed in setup_multipath */
701
702         if (!vector_alloc_slot(vecs->mpvec))
703                 goto out;
704
705         vector_set_slot(vecs->mpvec, mpp);
706         adopt_paths(vecs, mpp);
707
708         if (start_waiter_thread(mpp, vecs))
709                 goto out;
710
711         return 0;
712 out:
713         condlog(2, "%s: add devmap failed", mpp->alias);
714         remove_map(mpp, vecs);
715         return 1;
716 }
717
718 int
719 uev_remove_map (char * devname, struct vectors * vecs)
720 {
721         int minor;
722         struct multipath * mpp;
723
724         if (sscanf(devname, "dm-%d", &minor) == 1)
725                 mpp = find_mp_by_minor(vecs->mpvec, minor);
726         else
727                 mpp = find_mp_by_alias(vecs->mpvec, devname);
728
729         if (!mpp) {
730                 condlog(3, "%s: devmap not registered, can't remove",
731                         devname);
732                 return 0;
733         }
734
735         condlog(2, "remove %s devmap", mpp->alias);
736         remove_map(mpp, vecs);
737
738         return 0;
739 }
740
741 int
742 uev_add_path (char * devname, struct vectors * vecs)
743 {
744         struct path * pp;
745
746         pp = find_path_by_dev(vecs->pathvec, devname);
747
748         if (pp) {
749                 condlog(3, "%s: already in pathvec");
750                 return 1;
751         }
752         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
753                        devname, DI_SYSFS | DI_WWID);
754
755         if (!pp) {
756                 condlog(0, "%s: failed to store path info", devname);
757                 return 1;
758         }
759
760         condlog(2, "%s: path checker registered", devname);
761         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
762
763         if (pp->mpp) {
764                 condlog(4, "%s: ownership set to %s",
765                                 pp->dev_t, pp->mpp->alias);
766         } else {
767                 condlog(4, "%s: orphaned", pp->dev_t);
768                 orphan_path(pp);
769         }
770
771         return 0;
772 }
773
774 int
775 uev_remove_path (char * devname, struct vectors * vecs)
776 {
777         int i;
778         struct path * pp;
779
780         pp = find_path_by_dev(vecs->pathvec, devname);
781
782         if (!pp) {
783                 condlog(3, "%s: not in pathvec");
784                 return 1;
785         }
786
787         if (pp->mpp && pp->state == PATH_UP)
788                 update_queue_mode_del_path(pp->mpp);
789
790         condlog(2, "remove %s path checker", devname);
791         i = find_slot(vecs->pathvec, (void *)pp);
792         vector_del_slot(vecs->pathvec, i);
793         free_path(pp);
794
795         return 0;
796 }
797
798 int
799 show_paths (char ** r, int * len, struct vectors * vecs)
800 {
801         int i;
802         struct path * pp;
803         char * c;
804         char * reply;
805         struct path_layout pl;
806
807         get_path_layout(&pl, vecs->pathvec);
808         reply = MALLOC(MAX_REPLY_LEN);
809
810         if (!reply)
811                 return 1;
812
813         c = reply;
814
815         if (VECTOR_SIZE(vecs->pathvec) > 0)
816                 c += snprint_path_header(c, reply + MAX_REPLY_LEN - c,
817                                          PRINT_PATH_CHECKER, &pl);
818
819         vector_foreach_slot(vecs->pathvec, pp, i)
820                 c += snprint_path(c, reply + MAX_REPLY_LEN - c,
821                                   PRINT_PATH_CHECKER, pp, &pl);
822
823         *r = reply;
824         *len = (int)(c - reply + 1);
825         return 0;
826 }
827
828 int
829 show_maps (char ** r, int *len, struct vectors * vecs)
830 {
831         int i;
832         struct multipath * mpp;
833         char * c;
834         char * reply;
835         struct map_layout ml;
836
837         get_map_layout(&ml, vecs->mpvec);
838         reply = MALLOC(MAX_REPLY_LEN);
839
840         if (!reply)
841                 return 1;
842
843         c = reply;
844         if (VECTOR_SIZE(vecs->mpvec) > 0)
845                 c += snprint_map_header(c, reply + MAX_REPLY_LEN - c,
846                                         PRINT_MAP_FAILBACK, &ml);
847
848         vector_foreach_slot(vecs->mpvec, mpp, i)
849                 c += snprint_map(c, reply + MAX_REPLY_LEN - c,
850                                  PRINT_MAP_FAILBACK, mpp, &ml);
851
852         *r = reply;
853         *len = (int)(c - reply + 1);
854         return 0;
855 }
856
857 int
858 dump_pathvec (char ** r, int * len, struct vectors * vecs)
859 {
860         int i;
861         struct path * pp;
862         char * reply;
863         char * p;
864
865         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
866         reply = (char *)MALLOC(*len);
867         *r = reply;
868
869         if (!reply)
870                 return 1;
871
872         p = reply;
873
874         vector_foreach_slot (vecs->pathvec, pp, i) {
875                 memcpy((void *)p, pp, sizeof(struct path));
876                 p += sizeof(struct path);
877         }
878
879         /* return negative to hint caller not to add "ok" to the dump */
880         return -1;
881 }
882
883 static int
884 map_discovery (struct vectors * vecs)
885 {
886         int i;
887         struct multipath * mpp;
888
889         if (dm_get_maps(vecs->mpvec, "multipath"))
890                 return 1;
891
892         vector_foreach_slot (vecs->mpvec, mpp, i) {
893                 if (setup_multipath(vecs, mpp))
894                         return 1;
895                 start_waiter_thread(mpp, vecs);
896         }
897
898         return 0;
899 }
900
901 int
902 reconfigure (struct vectors * vecs)
903 {
904         struct config * old = conf;
905         struct multipath * mpp;
906         struct path * pp;
907         int i;
908
909         conf = NULL;
910
911         if (load_config(DEFAULT_CONFIGFILE)) {
912                 conf = old;
913                 condlog(2, "reconfigure failed, continue with old config");
914                 return 1;
915         }
916         conf->verbosity = old->verbosity;
917         free_config(old);
918
919         vector_foreach_slot (vecs->mpvec, mpp, i) {
920                 mpp->mpe = find_mpe(mpp->wwid);
921                 mpp->hwe = extract_hwe_from_path(mpp);
922                 adopt_paths(vecs, mpp);
923                 set_no_path_retry(mpp);
924         }
925         vector_foreach_slot (vecs->pathvec, pp, i) {
926                 select_checkfn(pp);
927                 select_getuid(pp);
928                 select_getprio(pp);
929         }
930         condlog(2, "reconfigured");
931         return 0;
932 }
933
934 int
935 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
936 {
937         struct vectors * vecs;
938         int r;
939         
940         *reply = NULL;
941         *len = 0;
942         vecs = (struct vectors *)trigger_data;
943
944         pthread_cleanup_push(cleanup_lock, vecs->lock);
945         lock(vecs->lock);
946
947         r = parse_cmd(str, reply, len, vecs);
948
949         if (r > 0) {
950                 *reply = STRDUP("fail\n");
951                 *len = strlen(*reply) + 1;
952                 r = 1;
953         }
954         else if (!r && *len == 0) {
955                 *reply = STRDUP("ok\n");
956                 *len = strlen(*reply) + 1;
957                 r = 0;
958         }
959         /* else if (r < 0) leave *reply alone */
960
961         lock_cleanup_pop(vecs->lock);
962         return r;
963 }
964
965 static int
966 uev_discard(char * devpath)
967 {
968         char a[10], b[10];
969
970         /*
971          * keep only block devices, discard partitions
972          */
973         if (sscanf(devpath, "/block/%10s", a) != 1 ||
974             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
975                 condlog(4, "discard event on %s", devpath);
976                 return 1;
977         }
978         return 0;
979 }
980
981 int 
982 uev_trigger (struct uevent * uev, void * trigger_data)
983 {
984         int r = 0;
985         char devname[32];
986         struct vectors * vecs;
987
988         vecs = (struct vectors *)trigger_data;
989
990         if (uev_discard(uev->devpath))
991                 goto out;
992
993         basename(uev->devpath, devname);
994         lock(vecs->lock);
995
996         /*
997          * device map add/remove event
998          */
999         if (!strncmp(devname, "dm-", 3)) {
1000                 if (!strncmp(uev->action, "add", 3)) {
1001                         r = uev_add_map(devname, vecs);
1002                         goto out;
1003                 }
1004 #if 0
1005                 if (!strncmp(uev->action, "remove", 6)) {
1006                         r = uev_remove_map(devname, vecs);
1007                         goto out;
1008                 }
1009 #endif
1010                 goto out;
1011         }
1012         
1013         /*
1014          * path add/remove event
1015          */
1016         if (blacklist(conf->blist, devname))
1017                 goto out;
1018
1019         if (!strncmp(uev->action, "add", 3)) {
1020                 r = uev_add_path(devname, vecs);
1021                 goto out;
1022         }
1023         if (!strncmp(uev->action, "remove", 6)) {
1024                 r = uev_remove_path(devname, vecs);
1025                 goto out;
1026         }
1027
1028 out:
1029         unlock(vecs->lock);
1030         return r;
1031 }
1032
1033 static void *
1034 ueventloop (void * ap)
1035 {
1036         if (uevent_listen(&uev_trigger, ap))
1037                 fprintf(stderr, "error starting uevent listener");
1038                 
1039         return NULL;
1040 }
1041
1042 static void *
1043 uxlsnrloop (void * ap)
1044 {
1045         if (load_keys())
1046                 return NULL;
1047         
1048         if (alloc_handlers())
1049                 return NULL;
1050
1051         add_handler(LIST+PATHS, cli_list_paths);
1052         add_handler(LIST+MAPS, cli_list_maps);
1053         add_handler(ADD+PATH, cli_add_path);
1054         add_handler(DEL+PATH, cli_del_path);
1055         add_handler(ADD+MAP, cli_add_map);
1056         add_handler(DEL+MAP, cli_del_map);
1057         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
1058         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
1059         add_handler(RECONFIGURE, cli_reconfigure);
1060         add_handler(SUSPEND+MAP, cli_suspend);
1061         add_handler(RESUME+MAP, cli_resume);
1062         add_handler(REINSTATE+PATH, cli_reinstate);
1063         add_handler(FAIL+PATH, cli_fail);
1064
1065         uxsock_listen(&uxsock_trigger, ap);
1066
1067         return NULL;
1068 }
1069
1070 static int
1071 exit_daemon (int status)
1072 {
1073         if (status != 0)
1074                 fprintf(stderr, "bad exit status. see daemon.log\n");
1075
1076         condlog(3, "unlink pidfile");
1077         unlink(DEFAULT_PIDFILE);
1078
1079         lock(&exit_mutex);
1080         pthread_cond_signal(&exit_cond);
1081         unlock(&exit_mutex);
1082
1083         return status;
1084 }
1085
1086 static void
1087 fail_path (struct path * pp)
1088 {
1089         if (!pp->mpp)
1090                 return;
1091
1092         condlog(2, "checker failed path %s in map %s",
1093                  pp->dev_t, pp->mpp->alias);
1094
1095         dm_fail_path(pp->mpp->alias, pp->dev_t);
1096         update_queue_mode_del_path(pp->mpp);
1097 }
1098
1099 /*
1100  * caller must have locked the path list before calling that function
1101  */
1102 static void
1103 reinstate_path (struct path * pp)
1104 {
1105         if (!pp->mpp)
1106                 return;
1107
1108         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1109                 condlog(0, "%s: reinstate failed", pp->dev_t);
1110         else {
1111                 condlog(2, "%s: reinstated", pp->dev_t);
1112                 update_queue_mode_add_path(pp->mpp);
1113         }
1114 }
1115
1116 static void
1117 enable_group(struct path * pp)
1118 {
1119         struct pathgroup * pgp;
1120
1121         /*
1122          * if path is added through uev_add_path, pgindex can be unset.
1123          * next update_strings() will set it, upon map reload event.
1124          *
1125          * we can safely return here, because upon map reload, all
1126          * PG will be enabled.
1127          */
1128         if (!pp->pgindex)
1129                 return;
1130
1131         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1132         
1133         if (pgp->status == PGSTATE_DISABLED) {
1134                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1135                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1136         }
1137 }
1138
1139 static void
1140 mpvec_garbage_collector (struct vectors * vecs)
1141 {
1142         struct multipath * mpp;
1143         int i;
1144
1145         vector_foreach_slot (vecs->mpvec, mpp, i) {
1146                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1147                         condlog(2, "%s: remove dead map", mpp->alias);
1148                         remove_map(mpp, vecs);
1149                         i--;
1150                 }
1151         }
1152 }
1153
1154 static void
1155 defered_failback_tick (vector mpvec)
1156 {
1157         struct multipath * mpp;
1158         int i;
1159
1160         vector_foreach_slot (mpvec, mpp, i) {
1161                 /*
1162                  * defered failback getting sooner
1163                  */
1164                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1165                         mpp->failback_tick--;
1166
1167                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1168                                 switch_pathgroup(mpp);
1169                 }
1170         }
1171 }
1172
1173 static void
1174 retry_count_tick(vector mpvec)
1175 {
1176         struct multipath *mpp;
1177         int i;
1178
1179         vector_foreach_slot (mpvec, mpp, i) {
1180                 if (mpp->retry_tick) {
1181                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1182                         if(--mpp->retry_tick == 0) {
1183                                 dm_queue_if_no_path(mpp->alias, 0);
1184                                 condlog(2, "%s: Disable queueing", mpp->alias);
1185                         }
1186                 }
1187         }
1188 }
1189
1190 static void *
1191 checkerloop (void *ap)
1192 {
1193         struct vectors *vecs;
1194         struct path *pp;
1195         int i, count = 0;
1196         int newstate;
1197         char checker_msg[MAX_CHECKER_MSG_SIZE];
1198
1199         mlockall(MCL_CURRENT | MCL_FUTURE);
1200
1201         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1202         vecs = (struct vectors *)ap;
1203
1204         condlog(2, "path checkers start up");
1205
1206         /*
1207          * init the path check interval
1208          */
1209         vector_foreach_slot (vecs->pathvec, pp, i) {
1210                 pp->checkint = conf->checkint;
1211         }
1212
1213         while (1) {
1214                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1215                 lock(vecs->lock);
1216                 condlog(4, "tick");
1217
1218                 vector_foreach_slot (vecs->pathvec, pp, i) {
1219                         if (!pp->mpp)
1220                                 continue;
1221
1222                         if (pp->tick && --pp->tick)
1223                                 continue; /* don't check this path yet */
1224
1225                         /*
1226                          * provision a next check soonest,
1227                          * in case we exit abnormaly from here
1228                          */
1229                         pp->tick = conf->checkint;
1230                         
1231                         if (!pp->checkfn) {
1232                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1233                                 select_checkfn(pp);
1234                         }
1235
1236                         if (!pp->checkfn) {
1237                                 condlog(0, "%s: checkfn is void", pp->dev);
1238                                 continue;
1239                         }
1240                         newstate = pp->checkfn(pp->fd, checker_msg,
1241                                                &pp->checker_context);
1242                         
1243                         if (newstate != pp->state) {
1244                                 pp->state = newstate;
1245                                 LOG_MSG(1, checker_msg);
1246
1247                                 /*
1248                                  * upon state change, reset the checkint
1249                                  * to the shortest delay
1250                                  */
1251                                 pp->checkint = conf->checkint;
1252
1253                                 if (newstate == PATH_DOWN ||
1254                                     newstate == PATH_SHAKY) {
1255                                         /*
1256                                          * proactively fail path in the DM
1257                                          */
1258                                         fail_path(pp);
1259
1260                                         /*
1261                                          * cancel scheduled failback
1262                                          */
1263                                         pp->mpp->failback_tick = 0;
1264
1265                                         continue;
1266                                 }
1267
1268                                 /*
1269                                  * reinstate this path
1270                                  */
1271                                 reinstate_path(pp);
1272
1273                                 /*
1274                                  * need to switch group ?
1275                                  */
1276                                 update_multipath_strings(pp->mpp,
1277                                                          vecs->pathvec);
1278
1279                                 /*
1280                                  * schedule defered failback
1281                                  */
1282                                 if (pp->mpp->pgfailback > 0)
1283                                         pp->mpp->failback_tick =
1284                                                 pp->mpp->pgfailback + 1;
1285                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1286                                     need_switch_pathgroup(pp->mpp, 1))
1287                                         switch_pathgroup(pp->mpp);
1288
1289                                 /*
1290                                  * if at least one path is up in a group, and
1291                                  * the group is disabled, re-enable it
1292                                  */
1293                                 if (newstate == PATH_UP)
1294                                         enable_group(pp);
1295                         }
1296                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1297                                 LOG_MSG(4, checker_msg);
1298                                 /*
1299                                  * double the next check delay.
1300                                  * max at conf->max_checkint
1301                                  */
1302                                 if (pp->checkint < (conf->max_checkint / 2))
1303                                         pp->checkint = 2 * pp->checkint;
1304                                 else
1305                                         pp->checkint = conf->max_checkint;
1306
1307                                 pp->tick = pp->checkint;
1308                                 condlog(4, "%s: delay next check %is",
1309                                                 pp->dev_t, pp->tick);
1310
1311                         }
1312                         pp->state = newstate;
1313
1314                         /*
1315                          * path prio refreshing
1316                          */
1317                         condlog(4, "path prio refresh");
1318                         pathinfo(pp, conf->hwtable, DI_PRIO);
1319
1320                         if (need_switch_pathgroup(pp->mpp, 0)) {
1321                                 if (pp->mpp->pgfailback > 0)
1322                                         pp->mpp->failback_tick =
1323                                                 pp->mpp->pgfailback + 1;
1324                                 else if (pp->mpp->pgfailback ==
1325                                                 -FAILBACK_IMMEDIATE)
1326                                         switch_pathgroup(pp->mpp);
1327                         }
1328                 }
1329                 defered_failback_tick(vecs->mpvec);
1330                 retry_count_tick(vecs->mpvec);
1331
1332                 if (count)
1333                         count--;
1334                 else {
1335                         condlog(4, "map garbage collection");
1336                         mpvec_garbage_collector(vecs);
1337                         count = MAPGCINT;
1338                 }
1339                 
1340                 lock_cleanup_pop(vecs->lock);
1341                 sleep(1);
1342         }
1343         return NULL;
1344 }
1345
1346 static struct vectors *
1347 init_paths (void)
1348 {
1349         struct vectors * vecs;
1350
1351         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1352
1353         if (!vecs)
1354                 return NULL;
1355
1356         vecs->lock = 
1357                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1358
1359         if (!vecs->lock)
1360                 goto out;
1361
1362         vecs->pathvec = vector_alloc();
1363
1364         if (!vecs->pathvec)
1365                 goto out1;
1366                 
1367         vecs->mpvec = vector_alloc();
1368
1369         if (!vecs->mpvec)
1370                 goto out2;
1371         
1372         pthread_mutex_init(vecs->lock, NULL);
1373
1374         return vecs;
1375
1376 out2:
1377         vector_free(vecs->pathvec);
1378 out1:
1379         FREE(vecs->lock);
1380 out:
1381         FREE(vecs);
1382         condlog(0, "failed to init paths");
1383         return NULL;
1384 }
1385
1386 static void *
1387 signal_set(int signo, void (*func) (int))
1388 {
1389         int r;
1390         struct sigaction sig;
1391         struct sigaction osig;
1392
1393         sig.sa_handler = func;
1394         sigemptyset(&sig.sa_mask);
1395         sig.sa_flags = 0;
1396
1397         r = sigaction(signo, &sig, &osig);
1398
1399         if (r < 0)
1400                 return (SIG_ERR);
1401         else
1402                 return (osig.sa_handler);
1403 }
1404
1405 static void
1406 sighup (int sig)
1407 {
1408         condlog(3, "SIGHUP received");
1409
1410 #ifdef _DEBUG_
1411         dbg_free_final(NULL);
1412 #endif
1413 }
1414
1415 static void
1416 sigend (int sig)
1417 {
1418         exit_daemon(0);
1419 }
1420
1421 static void
1422 signal_init(void)
1423 {
1424         signal_set(SIGHUP, sighup);
1425         signal_set(SIGINT, sigend);
1426         signal_set(SIGTERM, sigend);
1427         signal_set(SIGKILL, sigend);
1428 }
1429
1430 static void
1431 setscheduler (void)
1432 {
1433         int res;
1434         static struct sched_param sched_param = {
1435                 sched_priority: 99
1436         };
1437
1438         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1439
1440         if (res == -1)
1441                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1442         return;
1443 }
1444
1445 static void
1446 set_oom_adj (int val)
1447 {
1448         FILE *fp;
1449
1450         fp = fopen("/proc/self/oom_adj", "w");
1451
1452         if (!fp)
1453                 return;
1454
1455         fprintf(fp, "%i", val);
1456         fclose(fp);
1457 }
1458         
1459 static int
1460 child (void * param)
1461 {
1462         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1463         pthread_attr_t attr;
1464         struct vectors * vecs;
1465
1466         mlockall(MCL_CURRENT | MCL_FUTURE);
1467
1468         if (logsink)
1469                 log_thread_start();
1470
1471         condlog(2, "--------start up--------");
1472         condlog(2, "read " DEFAULT_CONFIGFILE);
1473
1474         if (load_config(DEFAULT_CONFIGFILE))
1475                 exit(1);
1476
1477         setlogmask(LOG_UPTO(conf->verbosity + 3));
1478
1479         /*
1480          * fill the voids left in the config file
1481          */
1482         if (!conf->checkint) {
1483                 conf->checkint = CHECKINT;
1484                 conf->max_checkint = MAX_CHECKINT;
1485         }
1486
1487         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1488                 if (logsink)
1489                         log_thread_stop();
1490
1491                 exit(1);
1492         }
1493         signal_init();
1494         setscheduler();
1495         set_oom_adj(-17);
1496         vecs = init_paths();
1497
1498         if (!vecs)
1499                 exit(1);
1500
1501         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1502                 condlog(0, "can not find sysfs mount point");
1503                 exit(1);
1504         }
1505
1506         /*
1507          * fetch paths and multipaths lists
1508          * no paths and/or no multipaths are valid scenarii
1509          * vectors maintenance will be driven by events
1510          */
1511         path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
1512         map_discovery(vecs);
1513
1514         /*
1515          * start threads
1516          */
1517         pthread_attr_init(&attr);
1518         pthread_attr_setstacksize(&attr, 64 * 1024);
1519         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1520         
1521         pthread_create(&check_thr, &attr, checkerloop, vecs);
1522         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1523         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1524
1525         pthread_cond_wait(&exit_cond, &exit_mutex);
1526
1527         /*
1528          * exit path
1529          */
1530         lock(vecs->lock);
1531         remove_maps(vecs);
1532         free_pathvec(vecs->pathvec, FREE_PATHS);
1533
1534         pthread_cancel(check_thr);
1535         pthread_cancel(uevent_thr);
1536         pthread_cancel(uxlsnr_thr);
1537
1538         free_keys(keys);
1539         keys = NULL;
1540         free_handlers(handlers);
1541         handlers = NULL;
1542         free_polls();
1543
1544         unlock(vecs->lock);
1545         pthread_mutex_destroy(vecs->lock);
1546         FREE(vecs->lock);
1547         vecs->lock = NULL;
1548         FREE(vecs);
1549         vecs = NULL;
1550         free_config(conf);
1551         conf = NULL;
1552
1553         condlog(2, "--------shut down-------");
1554         
1555         if (logsink)
1556                 log_thread_stop();
1557
1558 #ifdef _DEBUG_
1559         dbg_free_final(NULL);
1560 #endif
1561
1562         exit(0);
1563 }
1564
1565 static int
1566 daemonize(void)
1567 {
1568         int pid;
1569         int in_fd, out_fd;
1570
1571         if( (pid = fork()) < 0){
1572                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1573                 return -1;
1574         }
1575         else if (pid != 0)
1576                 return pid;
1577
1578         setsid();
1579
1580         if ( (pid = fork()) < 0)
1581                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1582         else if (pid != 0)
1583                 _exit(0);
1584
1585         in_fd = open("/dev/null", O_RDONLY);
1586         if (in_fd < 0){
1587                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1588                         strerror(errno));
1589                 _exit(0);
1590         }
1591         out_fd = open("/dev/console", O_WRONLY);
1592         if (out_fd < 0){
1593                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1594                         strerror(errno));
1595                 _exit(0);
1596         }
1597
1598         close(STDIN_FILENO);
1599         dup(in_fd);
1600         close(STDOUT_FILENO);
1601         dup(out_fd);
1602         close(STDERR_FILENO);
1603         dup(out_fd);
1604
1605         close(in_fd);
1606         close(out_fd);
1607         chdir("/");
1608         umask(0);
1609         return 0;
1610 }
1611
1612 int
1613 main (int argc, char *argv[])
1614 {
1615         extern char *optarg;
1616         extern int optind;
1617         int arg;
1618         int err;
1619         
1620         logsink = 1;
1621
1622         if (getuid() != 0) {
1623                 fprintf(stderr, "need to be root\n");
1624                 exit(1);
1625         }
1626
1627         /* make sure we don't lock any path */
1628         chdir("/");
1629         umask(umask(077) | 022);
1630
1631         conf = alloc_config();
1632
1633         if (!conf)
1634                 exit(1);
1635
1636         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1637         switch(arg) {
1638                 case 'd':
1639                         logsink = 0;
1640                         //debug=1; /* ### comment me out ### */
1641                         break;
1642                 case 'v':
1643                         if (sizeof(optarg) > sizeof(char *) ||
1644                             !isdigit(optarg[0]))
1645                                 exit(1);
1646
1647                         conf->verbosity = atoi(optarg);
1648                         break;
1649                 case 'k':
1650                         uxclnt(optarg);
1651                         exit(0);
1652                 default:
1653                         ;
1654                 }
1655         }
1656
1657         if (!logsink)
1658                 err = 0;
1659         else
1660                 err = daemonize();
1661         
1662         if (err < 0)
1663                 /* error */
1664                 exit(1);
1665         else if (err > 0)
1666                 /* parent dies */
1667                 exit(0);
1668         else
1669                 /* child lives */
1670                 return (child(NULL));
1671 }