[multipathd] fix offline devices reported as active
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 #include <unistd.h>
2 #include <sys/stat.h>
3 #include <libdevmapper.h>
4 #include <wait.h>
5 #include <sys/mman.h>
6 #include <sys/types.h>
7 #include <fcntl.h>
8 #include <errno.h>
9
10 /*
11  * libsysfs
12  */
13 #include <sysfs/libsysfs.h>
14 #include <sysfs/dlist.h>
15
16 /*
17  * libcheckers
18  */
19 #include <checkers.h>
20 #include <path_state.h>
21
22 /*
23  * libmultipath
24  */
25 #include <parser.h>
26 #include <vector.h>
27 #include <memory.h>
28 #include <config.h>
29 #include <callout.h>
30 #include <util.h>
31 #include <blacklist.h>
32 #include <hwtable.h>
33 #include <defaults.h>
34 #include <structs.h>
35 #include <dmparser.h>
36 #include <devmapper.h>
37 #include <dict.h>
38 #include <discovery.h>
39 #include <debug.h>
40 #include <propsel.h>
41 #include <uevent.h>
42 #include <switchgroup.h>
43 #include <path_state.h>
44 #include <print.h>
45
46 #include "main.h"
47 #include "pidfile.h"
48 #include "uxlsnr.h"
49 #include "uxclnt.h"
50 #include "cli.h"
51 #include "cli_handlers.h"
52
53 #define FILE_NAME_SIZE 256
54 #define CMDSIZE 160
55
56 #define LOG_MSG(a,b) \
57         if (strlen(b)) { \
58                 condlog(a, "%s: %s", pp->dev_t, b); \
59                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
60         }
61
62 #ifdef LCKDBG
63 #define lock(a) \
64         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
65         pthread_mutex_lock(a)
66 #define unlock(a) \
67         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
68         pthread_mutex_unlock(a)
69 #define lock_cleanup_pop(a) \
70         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_cleanup_pop(1);
72 #else
73 #define lock(a) pthread_mutex_lock(a)
74 #define unlock(a) pthread_mutex_unlock(a)
75 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
76 #endif
77
78 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
79 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
80
81 /*
82  * structs
83  */
84 struct event_thread {
85         struct dm_task *dmt;
86         pthread_t thread;
87         int event_nr;
88         char mapname[WWID_SIZE];
89         struct vectors *vecs;
90 };
91
92 static struct event_thread *
93 alloc_waiter (void)
94 {
95
96         struct event_thread * wp;
97
98         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
99
100         return wp;
101 }
102
103 static void
104 free_waiter (void * data)
105 {
106         struct event_thread * wp = (struct event_thread *)data;
107
108         if (wp->dmt)
109                 dm_task_destroy(wp->dmt);
110         FREE(wp);
111 }
112
113 static void
114 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
115 {
116         struct event_thread * wp = (struct event_thread *)mpp->waiter;
117         pthread_t thread;
118         
119         if (!wp) {
120                 condlog(3, "%s: no waiter thread", mpp->alias);
121                 return;
122         }
123         thread = wp->thread;
124
125         if (!wp) {
126                 condlog(3, "%s: thread not started", mpp->alias);
127                 return;
128         }
129         condlog(2, "%s: stop event checker thread", wp->mapname);
130         pthread_kill(thread, SIGHUP);
131 }
132
133 static void
134 cleanup_lock (void * data)
135 {
136         pthread_mutex_unlock((pthread_mutex_t *)data);
137 }
138
139 static void
140 adopt_paths (struct vectors * vecs, struct multipath * mpp)
141 {
142         int i;
143         struct path * pp;
144
145         if (!mpp)
146                 return;
147
148         vector_foreach_slot (vecs->pathvec, pp, i) {
149                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
150                         condlog(4, "%s ownership set", pp->dev_t);
151                         pp->mpp = mpp;
152                 }
153         }
154 }
155
156 static void
157 orphan_path (struct path * pp)
158 {
159         pp->mpp = NULL;
160         pp->checkfn = NULL;
161         pp->dmstate = PSTATE_UNDEF;
162         pp->checker_context = NULL;
163         pp->getuid = NULL;
164         pp->getprio = NULL;
165
166         if (pp->fd >= 0)
167                 close(pp->fd);
168
169         pp->fd = -1;
170 }
171
172 static void
173 orphan_paths (struct vectors * vecs, struct multipath * mpp)
174 {
175         int i;
176         struct path * pp;
177
178         vector_foreach_slot (vecs->pathvec, pp, i) {
179                 if (pp->mpp == mpp) {
180                         condlog(4, "%s is orphaned", pp->dev_t);
181                         orphan_path(pp);
182                 }
183         }
184 }
185
186 static int
187 update_multipath_table (struct multipath *mpp, vector pathvec)
188 {
189         if (!mpp)
190                 return 1;
191
192         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
193                 return 1;
194
195         if (disassemble_map(pathvec, mpp->params, mpp))
196                 return 1;
197
198         return 0;
199 }
200
201 static int
202 update_multipath_status (struct multipath *mpp)
203 {
204         if (!mpp)
205                 return 1;
206
207         if(dm_get_status(mpp->alias, mpp->status))
208                 return 1;
209
210         if (disassemble_status(mpp->status, mpp))
211                 return 1;
212
213         return 0;
214 }
215
216 static int
217 update_multipath_strings (struct multipath *mpp, vector pathvec)
218 {
219         if (mpp->selector) {
220                 FREE(mpp->selector);
221                 mpp->selector = NULL;
222         }
223
224         if (mpp->features) {
225                 FREE(mpp->features);
226                 mpp->features = NULL;
227         }
228
229         if (mpp->hwhandler) {
230                 FREE(mpp->hwhandler);
231                 mpp->hwhandler = NULL;
232         }
233
234         free_pgvec(mpp->pg, KEEP_PATHS);
235         mpp->pg = NULL;
236
237         if (update_multipath_table(mpp, pathvec))
238                 return 1;
239
240         if (update_multipath_status(mpp))
241                 return 1;
242
243         return 0;
244 }
245
246 static void
247 set_multipath_wwid (struct multipath * mpp)
248 {
249         if (mpp->wwid)
250                 return;
251
252         dm_get_uuid(mpp->alias, mpp->wwid);
253 }
254
255 static int
256 pathcount (struct multipath *mpp, int state)
257 {
258         struct pathgroup *pgp;
259         struct path *pp;
260         int i, j;
261         int count = 0;
262
263         vector_foreach_slot (mpp->pg, pgp, i)
264                 vector_foreach_slot (pgp->paths, pp, j)
265                         if (pp->state == state)
266                                 count++;
267         return count;
268 }
269
270 /*
271  * mpp->no_path_retry:
272  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
273  *   -1 (FAIL)  : fail_if_no_path
274  *    0 (UNDEF) : nothing
275  *   >0         : queue_if_no_path enabled, turned off after polling n times
276  */
277 static void
278 update_queue_mode_del_path(struct multipath *mpp)
279 {
280         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
281                 /*
282                  * Enter retry mode.
283                  * meaning of +1: retry_tick may be decremented in
284                  *                checkerloop before starting retry.
285                  */
286                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
287                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
288                         mpp->alias, mpp->no_path_retry);
289         }
290         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
291 }
292
293 static void
294 update_queue_mode_add_path(struct multipath *mpp)
295 {
296         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
297                 /* come back to normal mode from retry mode */
298                 mpp->retry_tick = 0;
299                 dm_queue_if_no_path(mpp->alias, 1);
300                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
301                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
302         }
303         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
304 }
305
306 static void
307 set_no_path_retry(struct multipath *mpp)
308 {
309         mpp->retry_tick = 0;
310         mpp->nr_active = pathcount(mpp, PATH_UP);
311         select_no_path_retry(mpp);
312
313         switch (mpp->no_path_retry) {
314         case NO_PATH_RETRY_UNDEF:
315                 break;
316         case NO_PATH_RETRY_FAIL:
317                 dm_queue_if_no_path(mpp->alias, 0);
318                 break;
319         case NO_PATH_RETRY_QUEUE:
320                 dm_queue_if_no_path(mpp->alias, 1);
321                 break;
322         default:
323                 dm_queue_if_no_path(mpp->alias, 1);
324                 if (mpp->nr_active == 0) {
325                         /* Enter retry mode */
326                         mpp->retry_tick = mpp->no_path_retry * conf->checkint;
327                         condlog(1, "%s: Entering recovery mode: max_retries=%d",
328                                 mpp->alias, mpp->no_path_retry);
329                 }
330                 break;
331         }
332 }
333
334 static struct hwentry *
335 extract_hwe_from_path(struct multipath * mpp)
336 {
337         struct path * pp;
338         struct pathgroup * pgp;
339
340         pgp = VECTOR_SLOT(mpp->pg, 0);
341         pp = VECTOR_SLOT(pgp->paths, 0);
342
343         return pp->hwe;
344 }
345
346 static void
347 remove_map (struct multipath * mpp, struct vectors * vecs)
348 {
349         int i;
350
351         stop_waiter_thread(mpp, vecs);
352
353         /*
354          * clear references to this map
355          */
356         orphan_paths(vecs, mpp);
357
358         /*
359          * purge the multipath vector
360          */
361         i = find_slot(vecs->mpvec, (void *)mpp);
362         vector_del_slot(vecs->mpvec, i);
363
364         /*
365          * final free
366          */
367         free_multipath(mpp, KEEP_PATHS);
368         mpp = NULL;
369 }
370
371 static void
372 remove_maps (struct vectors * vecs)
373 {
374         int i;
375         struct multipath * mpp;
376
377         vector_foreach_slot (vecs->mpvec, mpp, i) {
378                 remove_map(mpp, vecs);
379                 i--;
380         }
381
382         vector_free(vecs->mpvec);
383         vecs->mpvec = NULL;
384 }
385
386 static int
387 setup_multipath (struct vectors * vecs, struct multipath * mpp)
388 {
389         if (dm_get_info(mpp->alias, &mpp->dmi))
390                 goto out;
391
392         set_multipath_wwid(mpp);
393         mpp->mpe = find_mpe(mpp->wwid);
394         condlog(4, "discovered map %s", mpp->alias);
395
396         if (update_multipath_strings(mpp, vecs->pathvec))
397                 goto out;
398
399         adopt_paths(vecs, mpp);
400         mpp->hwe = extract_hwe_from_path(mpp);
401         select_pgfailback(mpp);
402         set_no_path_retry(mpp);
403
404         return 0;
405 out:
406         condlog(0, "%s: failed to setup multipath", mpp->alias);
407         remove_map(mpp, vecs);
408         return 1;
409 }
410
411 static int
412 need_switch_pathgroup (struct multipath * mpp, int refresh)
413 {
414         struct pathgroup * pgp;
415         struct path * pp;
416         int i, j;
417
418         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
419                 return 0;
420
421         /*
422          * Refresh path priority values
423          */
424         if (refresh)
425                 vector_foreach_slot (mpp->pg, pgp, i)
426                         vector_foreach_slot (pgp->paths, pp, j)
427                                 pathinfo(pp, conf->hwtable, DI_PRIO);
428
429         mpp->bestpg = select_path_group(mpp);
430
431         if (mpp->bestpg != mpp->nextpg)
432                 return 1;
433
434         return 0;
435 }
436
437 static void
438 switch_pathgroup (struct multipath * mpp)
439 {
440         dm_switchgroup(mpp->alias, mpp->bestpg);
441         condlog(2, "%s: switch to path group #%i",
442                  mpp->alias, mpp->bestpg);
443 }
444
445 static int
446 update_multipath (struct vectors *vecs, char *mapname)
447 {
448         struct multipath *mpp;
449         struct pathgroup  *pgp;
450         struct path *pp;
451         int i, j;
452         int r = 1;
453
454         mpp = find_mp_by_alias(vecs->mpvec, mapname);
455
456         if (!mpp)
457                 goto out;
458
459         free_pgvec(mpp->pg, KEEP_PATHS);
460         mpp->pg = NULL;
461
462         if (setup_multipath(vecs, mpp))
463                 goto out; /* mpp freed in setup_multipath */
464
465         /*
466          * compare checkers states with DM states
467          */
468         vector_foreach_slot (mpp->pg, pgp, i) {
469                 vector_foreach_slot (pgp->paths, pp, j) {
470                         if (pp->dmstate != PSTATE_FAILED)
471                                 continue;
472
473                         if (pp->state != PATH_DOWN) {
474                                 condlog(2, "%s: mark as failed", pp->dev_t);
475                                 pp->state = PATH_DOWN;
476                                 update_queue_mode_del_path(mpp);
477
478                                 /*
479                                  * if opportune,
480                                  * schedule the next check earlier
481                                  */
482                                 if (pp->tick > conf->checkint)
483                                         pp->tick = conf->checkint;
484                         }
485                 }
486         }
487         r = 0;
488 out:
489         if (r)
490                 condlog(0, "failed to update multipath");
491
492         return r;
493 }
494
495 static sigset_t unblock_sighup(void)
496 {
497         sigset_t set, old;
498
499         sigemptyset(&set);
500         sigaddset(&set, SIGHUP);
501         pthread_sigmask(SIG_UNBLOCK, &set, &old);
502         return old;
503 }
504
505 /*
506  * returns the reschedule delay
507  * negative means *stop*
508  */
509 static int
510 waiteventloop (struct event_thread * waiter)
511 {
512         sigset_t set;
513         int event_nr;
514         int r;
515
516         if (!waiter->event_nr)
517                 waiter->event_nr = dm_geteventnr(waiter->mapname);
518
519         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
520                 return 1;
521
522         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
523                 dm_task_destroy(waiter->dmt);
524                 return 1;
525         }
526
527         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
528                                                       waiter->event_nr)) {
529                 dm_task_destroy(waiter->dmt);
530                 return 1;
531         }
532
533         dm_task_no_open_count(waiter->dmt);
534         
535         /* accept wait interruption */
536         set = unblock_sighup();
537
538         /* interruption spits messages */
539         dm_shut_log();
540
541         /* wait */
542         r = dm_task_run(waiter->dmt);
543
544         /* wait is over : event or interrupt */
545         pthread_sigmask(SIG_SETMASK, &set, NULL);
546         //dm_restore_log();
547
548         if (!r) /* wait interrupted by signal */
549                 return -1;
550
551         dm_task_destroy(waiter->dmt);
552         waiter->dmt = NULL;
553         waiter->event_nr++;
554
555         /*
556          * upon event ...
557          */
558         while (1) {
559                 condlog(3, "%s: devmap event #%i",
560                                 waiter->mapname, waiter->event_nr);
561
562                 /*
563                  * event might be :
564                  *
565                  * 1) a table reload, which means our mpp structure is
566                  *    obsolete : refresh it through update_multipath()
567                  * 2) a path failed by DM : mark as such through
568                  *    update_multipath()
569                  * 3) map has gone away : stop the thread.
570                  * 4) a path reinstate : nothing to do
571                  * 5) a switch group : nothing to do
572                  */
573                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
574                 lock(waiter->vecs->lock);
575                 r = update_multipath(waiter->vecs, waiter->mapname);
576                 lock_cleanup_pop(waiter->vecs->lock);
577
578                 if (r)
579                         return -1; /* stop the thread */
580
581                 event_nr = dm_geteventnr(waiter->mapname);
582
583                 if (waiter->event_nr == event_nr)
584                         return 1; /* upon problem reschedule 1s later */
585
586                 waiter->event_nr = event_nr;
587         }
588         return -1; /* never reach there */
589 }
590
591 static void *
592 waitevent (void * et)
593 {
594         int r;
595         struct event_thread *waiter;
596
597         mlockall(MCL_CURRENT | MCL_FUTURE);
598
599         waiter = (struct event_thread *)et;
600         pthread_cleanup_push(free_waiter, et);
601
602         while (1) {
603                 r = waiteventloop(waiter);
604
605                 if (r < 0)
606                         break;
607
608                 sleep(r);
609         }
610
611         pthread_cleanup_pop(1);
612         return NULL;
613 }
614
615 static int
616 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
617 {
618         pthread_attr_t attr;
619         struct event_thread * wp;
620
621         if (!mpp)
622                 return 0;
623
624         if (pthread_attr_init(&attr))
625                 goto out;
626
627         pthread_attr_setstacksize(&attr, 32 * 1024);
628         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
629
630         wp = alloc_waiter();
631
632         if (!wp)
633                 goto out;
634
635         mpp->waiter = (void *)wp;
636         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
637         wp->vecs = vecs;
638
639         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
640                 condlog(0, "%s: cannot create event checker", wp->mapname);
641                 goto out1;
642         }
643         condlog(2, "%s: event checker started", wp->mapname);
644
645         return 0;
646 out1:
647         free_waiter(wp);
648         mpp->waiter = NULL;
649 out:
650         condlog(0, "failed to start waiter thread");
651         return 1;
652 }
653
654 int
655 uev_add_map (char * devname, struct vectors * vecs)
656 {
657         int major, minor;
658         char dev_t[BLK_DEV_SIZE];
659         char * alias;
660         struct multipath * mpp;
661
662         if (sscanf(devname, "dm-%d", &minor) == 1 &&
663             !sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE) &&
664             sscanf(dev_t, "%d:%d", &major, &minor) == 2)
665                 alias = dm_mapname(major, minor);
666         else
667                 alias = STRDUP(devname);
668                 
669         if (!alias)
670                 return 1;
671         
672         if (!dm_type(alias, DEFAULT_TARGET)) {
673                 condlog(4, "%s: not a multipath map", alias);
674                 FREE(alias);
675                 return 0;
676         }
677
678         mpp = find_mp_by_alias(vecs->mpvec, alias);
679
680         if (mpp) {
681                 /*
682                  * this should not happen,
683                  * we missed a remove map event (not sent ?)
684                  */
685                 condlog(2, "%s: already registered", alias);
686                 remove_map(mpp, vecs);
687         }
688
689         /*
690          * now we can allocate
691          */
692         mpp = alloc_multipath();
693
694         if (!mpp)
695                 return 1;
696
697         mpp->alias = alias;
698
699         if (setup_multipath(vecs, mpp))
700                 return 1; /* mpp freed in setup_multipath */
701
702         if (!vector_alloc_slot(vecs->mpvec))
703                 goto out;
704
705         vector_set_slot(vecs->mpvec, mpp);
706         adopt_paths(vecs, mpp);
707
708         if (start_waiter_thread(mpp, vecs))
709                 goto out;
710
711         return 0;
712 out:
713         condlog(2, "%s: add devmap failed", mpp->alias);
714         remove_map(mpp, vecs);
715         return 1;
716 }
717
718 int
719 uev_remove_map (char * devname, struct vectors * vecs)
720 {
721         int minor;
722         struct multipath * mpp;
723
724         if (sscanf(devname, "dm-%d", &minor) == 1)
725                 mpp = find_mp_by_minor(vecs->mpvec, minor);
726         else
727                 mpp = find_mp_by_alias(vecs->mpvec, devname);
728
729         if (!mpp) {
730                 condlog(3, "%s: devmap not registered, can't remove",
731                         devname);
732                 return 0;
733         }
734
735         condlog(2, "remove %s devmap", mpp->alias);
736         remove_map(mpp, vecs);
737
738         return 0;
739 }
740
741 int
742 uev_add_path (char * devname, struct vectors * vecs)
743 {
744         struct path * pp;
745
746         pp = find_path_by_dev(vecs->pathvec, devname);
747
748         if (pp) {
749                 condlog(3, "%s: already in pathvec");
750                 return 1;
751         }
752         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
753                        devname, DI_SYSFS | DI_WWID);
754
755         if (!pp) {
756                 condlog(0, "%s: failed to store path info", devname);
757                 return 1;
758         }
759
760         condlog(2, "%s: path checker registered", devname);
761         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
762
763         if (pp->mpp) {
764                 condlog(4, "%s: ownership set to %s",
765                                 pp->dev_t, pp->mpp->alias);
766         } else {
767                 condlog(4, "%s: orphaned", pp->dev_t);
768                 orphan_path(pp);
769         }
770
771         return 0;
772 }
773
774 int
775 uev_remove_path (char * devname, struct vectors * vecs)
776 {
777         int i;
778         struct path * pp;
779
780         pp = find_path_by_dev(vecs->pathvec, devname);
781
782         if (!pp) {
783                 condlog(3, "%s: not in pathvec");
784                 return 1;
785         }
786
787         if (pp->mpp && pp->state == PATH_UP)
788                 update_queue_mode_del_path(pp->mpp);
789
790         condlog(2, "remove %s path checker", devname);
791         i = find_slot(vecs->pathvec, (void *)pp);
792         vector_del_slot(vecs->pathvec, i);
793         free_path(pp);
794
795         return 0;
796 }
797
798 int
799 show_paths (char ** r, int * len, struct vectors * vecs)
800 {
801         int i;
802         struct path * pp;
803         char * c;
804         char * reply;
805         struct path_layout pl;
806
807         get_path_layout(&pl, vecs->pathvec);
808         reply = MALLOC(MAX_REPLY_LEN);
809
810         if (!reply)
811                 return 1;
812
813         c = reply;
814
815         if (VECTOR_SIZE(vecs->pathvec) > 0)
816                 c += snprint_path_header(c, reply + MAX_REPLY_LEN - c,
817                                          PRINT_PATH_CHECKER, &pl);
818
819         vector_foreach_slot(vecs->pathvec, pp, i)
820                 c += snprint_path(c, reply + MAX_REPLY_LEN - c,
821                                   PRINT_PATH_CHECKER, pp, &pl);
822
823         *r = reply;
824         *len = (int)(c - reply + 1);
825         return 0;
826 }
827
828 int
829 show_maps (char ** r, int *len, struct vectors * vecs)
830 {
831         int i;
832         struct multipath * mpp;
833         char * c;
834         char * reply;
835         struct map_layout ml;
836
837         get_map_layout(&ml, vecs->mpvec);
838         reply = MALLOC(MAX_REPLY_LEN);
839
840         if (!reply)
841                 return 1;
842
843         c = reply;
844         if (VECTOR_SIZE(vecs->mpvec) > 0)
845                 c += snprint_map_header(c, reply + MAX_REPLY_LEN - c,
846                                         PRINT_MAP_FAILBACK, &ml);
847
848         vector_foreach_slot(vecs->mpvec, mpp, i)
849                 c += snprint_map(c, reply + MAX_REPLY_LEN - c,
850                                  PRINT_MAP_FAILBACK, mpp, &ml);
851
852         *r = reply;
853         *len = (int)(c - reply + 1);
854         return 0;
855 }
856
857 int
858 dump_pathvec (char ** r, int * len, struct vectors * vecs)
859 {
860         int i;
861         struct path * pp;
862         char * reply;
863         char * p;
864
865         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
866         reply = (char *)MALLOC(*len);
867         *r = reply;
868
869         if (!reply)
870                 return 1;
871
872         p = reply;
873
874         vector_foreach_slot (vecs->pathvec, pp, i) {
875                 memcpy((void *)p, pp, sizeof(struct path));
876                 p += sizeof(struct path);
877         }
878
879         /* return negative to hint caller not to add "ok" to the dump */
880         return -1;
881 }
882
883 static int
884 map_discovery (struct vectors * vecs)
885 {
886         int i;
887         struct multipath * mpp;
888
889         if (dm_get_maps(vecs->mpvec, "multipath"))
890                 return 1;
891
892         vector_foreach_slot (vecs->mpvec, mpp, i) {
893                 if (setup_multipath(vecs, mpp))
894                         return 1;
895                 start_waiter_thread(mpp, vecs);
896         }
897
898         return 0;
899 }
900
901 int
902 reconfigure (struct vectors * vecs)
903 {
904         struct config * old = conf;
905         struct multipath * mpp;
906         struct path * pp;
907         int i;
908
909         conf = NULL;
910
911         if (load_config(DEFAULT_CONFIGFILE)) {
912                 conf = old;
913                 condlog(2, "reconfigure failed, continue with old config");
914                 return 1;
915         }
916         conf->verbosity = old->verbosity;
917         free_config(old);
918
919         vector_foreach_slot (vecs->mpvec, mpp, i) {
920                 mpp->mpe = find_mpe(mpp->wwid);
921                 mpp->hwe = extract_hwe_from_path(mpp);
922                 adopt_paths(vecs, mpp);
923                 set_no_path_retry(mpp);
924         }
925         vector_foreach_slot (vecs->pathvec, pp, i) {
926                 select_checkfn(pp);
927                 select_getuid(pp);
928                 select_getprio(pp);
929         }
930         condlog(2, "reconfigured");
931         return 0;
932 }
933
934 int
935 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
936 {
937         struct vectors * vecs;
938         int r;
939         
940         *reply = NULL;
941         *len = 0;
942         vecs = (struct vectors *)trigger_data;
943
944         pthread_cleanup_push(cleanup_lock, vecs->lock);
945         lock(vecs->lock);
946
947         r = parse_cmd(str, reply, len, vecs);
948
949         if (r > 0) {
950                 *reply = STRDUP("fail\n");
951                 *len = strlen(*reply) + 1;
952                 r = 1;
953         }
954         else if (!r && *len == 0) {
955                 *reply = STRDUP("ok\n");
956                 *len = strlen(*reply) + 1;
957                 r = 0;
958         }
959         /* else if (r < 0) leave *reply alone */
960
961         lock_cleanup_pop(vecs->lock);
962         return r;
963 }
964
965 static int
966 uev_discard(char * devpath)
967 {
968         char a[10], b[10];
969
970         /*
971          * keep only block devices, discard partitions
972          */
973         if (sscanf(devpath, "/block/%10s", a) != 1 ||
974             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
975                 condlog(4, "discard event on %s", devpath);
976                 return 1;
977         }
978         return 0;
979 }
980
981 int 
982 uev_trigger (struct uevent * uev, void * trigger_data)
983 {
984         int r = 0;
985         char devname[32];
986         struct vectors * vecs;
987
988         vecs = (struct vectors *)trigger_data;
989
990         if (uev_discard(uev->devpath))
991                 goto out;
992
993         basename(uev->devpath, devname);
994         lock(vecs->lock);
995
996         /*
997          * device map add/remove event
998          */
999         if (!strncmp(devname, "dm-", 3)) {
1000                 if (!strncmp(uev->action, "add", 3)) {
1001                         r = uev_add_map(devname, vecs);
1002                         goto out;
1003                 }
1004 #if 0
1005                 if (!strncmp(uev->action, "remove", 6)) {
1006                         r = uev_remove_map(devname, vecs);
1007                         goto out;
1008                 }
1009 #endif
1010                 goto out;
1011         }
1012         
1013         /*
1014          * path add/remove event
1015          */
1016         if (blacklist(conf->blist, devname))
1017                 goto out;
1018
1019         if (!strncmp(uev->action, "add", 3)) {
1020                 r = uev_add_path(devname, vecs);
1021                 goto out;
1022         }
1023         if (!strncmp(uev->action, "remove", 6)) {
1024                 r = uev_remove_path(devname, vecs);
1025                 goto out;
1026         }
1027
1028 out:
1029         unlock(vecs->lock);
1030         return r;
1031 }
1032
1033 static void *
1034 ueventloop (void * ap)
1035 {
1036         if (uevent_listen(&uev_trigger, ap))
1037                 fprintf(stderr, "error starting uevent listener");
1038                 
1039         return NULL;
1040 }
1041
1042 static void *
1043 uxlsnrloop (void * ap)
1044 {
1045         if (load_keys())
1046                 return NULL;
1047         
1048         if (alloc_handlers())
1049                 return NULL;
1050
1051         add_handler(LIST+PATHS, cli_list_paths);
1052         add_handler(LIST+MAPS, cli_list_maps);
1053         add_handler(ADD+PATH, cli_add_path);
1054         add_handler(DEL+PATH, cli_del_path);
1055         add_handler(ADD+MAP, cli_add_map);
1056         add_handler(DEL+MAP, cli_del_map);
1057         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
1058         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
1059         add_handler(RECONFIGURE, cli_reconfigure);
1060         add_handler(SUSPEND+MAP, cli_suspend);
1061         add_handler(RESUME+MAP, cli_resume);
1062         add_handler(REINSTATE+PATH, cli_reinstate);
1063         add_handler(FAIL+PATH, cli_fail);
1064
1065         uxsock_listen(&uxsock_trigger, ap);
1066
1067         return NULL;
1068 }
1069
1070 static int
1071 exit_daemon (int status)
1072 {
1073         if (status != 0)
1074                 fprintf(stderr, "bad exit status. see daemon.log\n");
1075
1076         condlog(3, "unlink pidfile");
1077         unlink(DEFAULT_PIDFILE);
1078
1079         lock(&exit_mutex);
1080         pthread_cond_signal(&exit_cond);
1081         unlock(&exit_mutex);
1082
1083         return status;
1084 }
1085
1086 static void
1087 fail_path (struct path * pp)
1088 {
1089         if (!pp->mpp)
1090                 return;
1091
1092         condlog(2, "checker failed path %s in map %s",
1093                  pp->dev_t, pp->mpp->alias);
1094
1095         dm_fail_path(pp->mpp->alias, pp->dev_t);
1096         update_queue_mode_del_path(pp->mpp);
1097 }
1098
1099 /*
1100  * caller must have locked the path list before calling that function
1101  */
1102 static void
1103 reinstate_path (struct path * pp)
1104 {
1105         if (!pp->mpp)
1106                 return;
1107
1108         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1109                 condlog(0, "%s: reinstate failed", pp->dev_t);
1110         else {
1111                 condlog(2, "%s: reinstated", pp->dev_t);
1112                 update_queue_mode_add_path(pp->mpp);
1113         }
1114 }
1115
1116 static void
1117 enable_group(struct path * pp)
1118 {
1119         struct pathgroup * pgp;
1120
1121         /*
1122          * if path is added through uev_add_path, pgindex can be unset.
1123          * next update_strings() will set it, upon map reload event.
1124          *
1125          * we can safely return here, because upon map reload, all
1126          * PG will be enabled.
1127          */
1128         if (!pp->pgindex)
1129                 return;
1130
1131         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1132         
1133         if (pgp->status == PGSTATE_DISABLED) {
1134                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1135                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1136         }
1137 }
1138
1139 static void
1140 mpvec_garbage_collector (struct vectors * vecs)
1141 {
1142         struct multipath * mpp;
1143         int i;
1144
1145         vector_foreach_slot (vecs->mpvec, mpp, i) {
1146                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1147                         condlog(2, "%s: remove dead map", mpp->alias);
1148                         remove_map(mpp, vecs);
1149                         i--;
1150                 }
1151         }
1152 }
1153
1154 static void
1155 defered_failback_tick (vector mpvec)
1156 {
1157         struct multipath * mpp;
1158         int i;
1159
1160         vector_foreach_slot (mpvec, mpp, i) {
1161                 /*
1162                  * defered failback getting sooner
1163                  */
1164                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1165                         mpp->failback_tick--;
1166
1167                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1168                                 switch_pathgroup(mpp);
1169                 }
1170         }
1171 }
1172
1173 static void
1174 retry_count_tick(vector mpvec)
1175 {
1176         struct multipath *mpp;
1177         int i;
1178
1179         vector_foreach_slot (mpvec, mpp, i) {
1180                 if (mpp->retry_tick) {
1181                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1182                         if(--mpp->retry_tick == 0) {
1183                                 dm_queue_if_no_path(mpp->alias, 0);
1184                                 condlog(2, "%s: Disable queueing", mpp->alias);
1185                         }
1186                 }
1187         }
1188 }
1189
1190 static void *
1191 checkerloop (void *ap)
1192 {
1193         struct vectors *vecs;
1194         struct path *pp;
1195         int i, count = 0;
1196         int newstate;
1197         char checker_msg[MAX_CHECKER_MSG_SIZE];
1198
1199         mlockall(MCL_CURRENT | MCL_FUTURE);
1200
1201         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1202         vecs = (struct vectors *)ap;
1203
1204         condlog(2, "path checkers start up");
1205
1206         /*
1207          * init the path check interval
1208          */
1209         vector_foreach_slot (vecs->pathvec, pp, i) {
1210                 pp->checkint = conf->checkint;
1211         }
1212
1213         while (1) {
1214                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1215                 lock(vecs->lock);
1216                 condlog(4, "tick");
1217
1218                 vector_foreach_slot (vecs->pathvec, pp, i) {
1219                         if (!pp->mpp)
1220                                 continue;
1221
1222                         if (pp->tick && --pp->tick)
1223                                 continue; /* don't check this path yet */
1224
1225                         /*
1226                          * provision a next check soonest,
1227                          * in case we exit abnormaly from here
1228                          */
1229                         pp->tick = conf->checkint;
1230                         
1231                         if (!pp->checkfn) {
1232                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1233                                 select_checkfn(pp);
1234                         }
1235
1236                         if (!pp->checkfn) {
1237                                 condlog(0, "%s: checkfn is void", pp->dev);
1238                                 continue;
1239                         }
1240                         newstate = pp->checkfn(pp->fd, checker_msg,
1241                                                &pp->checker_context);
1242                         
1243                         if (newstate < 0) {
1244                                 condlog(2, "%s: unusable path", pp->dev);
1245                                 pathinfo(pp, conf->hwtable, 0);
1246                                 continue;
1247                         }
1248
1249                         if (newstate != pp->state) {
1250                                 pp->state = newstate;
1251                                 LOG_MSG(1, checker_msg);
1252
1253                                 /*
1254                                  * upon state change, reset the checkint
1255                                  * to the shortest delay
1256                                  */
1257                                 pp->checkint = conf->checkint;
1258
1259                                 if (newstate == PATH_DOWN ||
1260                                     newstate == PATH_SHAKY) {
1261                                         /*
1262                                          * proactively fail path in the DM
1263                                          */
1264                                         fail_path(pp);
1265
1266                                         /*
1267                                          * cancel scheduled failback
1268                                          */
1269                                         pp->mpp->failback_tick = 0;
1270
1271                                         continue;
1272                                 }
1273
1274                                 /*
1275                                  * reinstate this path
1276                                  */
1277                                 reinstate_path(pp);
1278
1279                                 /*
1280                                  * need to switch group ?
1281                                  */
1282                                 update_multipath_strings(pp->mpp,
1283                                                          vecs->pathvec);
1284
1285                                 /*
1286                                  * schedule defered failback
1287                                  */
1288                                 if (pp->mpp->pgfailback > 0)
1289                                         pp->mpp->failback_tick =
1290                                                 pp->mpp->pgfailback + 1;
1291                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1292                                     need_switch_pathgroup(pp->mpp, 1))
1293                                         switch_pathgroup(pp->mpp);
1294
1295                                 /*
1296                                  * if at least one path is up in a group, and
1297                                  * the group is disabled, re-enable it
1298                                  */
1299                                 if (newstate == PATH_UP)
1300                                         enable_group(pp);
1301                         }
1302                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1303                                 LOG_MSG(4, checker_msg);
1304                                 /*
1305                                  * double the next check delay.
1306                                  * max at conf->max_checkint
1307                                  */
1308                                 if (pp->checkint < (conf->max_checkint / 2))
1309                                         pp->checkint = 2 * pp->checkint;
1310                                 else
1311                                         pp->checkint = conf->max_checkint;
1312
1313                                 pp->tick = pp->checkint;
1314                                 condlog(4, "%s: delay next check %is",
1315                                                 pp->dev_t, pp->tick);
1316
1317                         }
1318                         pp->state = newstate;
1319
1320                         /*
1321                          * path prio refreshing
1322                          */
1323                         condlog(4, "path prio refresh");
1324                         pathinfo(pp, conf->hwtable, DI_PRIO);
1325
1326                         if (need_switch_pathgroup(pp->mpp, 0)) {
1327                                 if (pp->mpp->pgfailback > 0)
1328                                         pp->mpp->failback_tick =
1329                                                 pp->mpp->pgfailback + 1;
1330                                 else if (pp->mpp->pgfailback ==
1331                                                 -FAILBACK_IMMEDIATE)
1332                                         switch_pathgroup(pp->mpp);
1333                         }
1334                 }
1335                 defered_failback_tick(vecs->mpvec);
1336                 retry_count_tick(vecs->mpvec);
1337
1338                 if (count)
1339                         count--;
1340                 else {
1341                         condlog(4, "map garbage collection");
1342                         mpvec_garbage_collector(vecs);
1343                         count = MAPGCINT;
1344                 }
1345                 
1346                 lock_cleanup_pop(vecs->lock);
1347                 sleep(1);
1348         }
1349         return NULL;
1350 }
1351
1352 static struct vectors *
1353 init_paths (void)
1354 {
1355         struct vectors * vecs;
1356
1357         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1358
1359         if (!vecs)
1360                 return NULL;
1361
1362         vecs->lock = 
1363                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1364
1365         if (!vecs->lock)
1366                 goto out;
1367
1368         vecs->pathvec = vector_alloc();
1369
1370         if (!vecs->pathvec)
1371                 goto out1;
1372                 
1373         vecs->mpvec = vector_alloc();
1374
1375         if (!vecs->mpvec)
1376                 goto out2;
1377         
1378         pthread_mutex_init(vecs->lock, NULL);
1379
1380         return vecs;
1381
1382 out2:
1383         vector_free(vecs->pathvec);
1384 out1:
1385         FREE(vecs->lock);
1386 out:
1387         FREE(vecs);
1388         condlog(0, "failed to init paths");
1389         return NULL;
1390 }
1391
1392 static void *
1393 signal_set(int signo, void (*func) (int))
1394 {
1395         int r;
1396         struct sigaction sig;
1397         struct sigaction osig;
1398
1399         sig.sa_handler = func;
1400         sigemptyset(&sig.sa_mask);
1401         sig.sa_flags = 0;
1402
1403         r = sigaction(signo, &sig, &osig);
1404
1405         if (r < 0)
1406                 return (SIG_ERR);
1407         else
1408                 return (osig.sa_handler);
1409 }
1410
1411 static void
1412 sighup (int sig)
1413 {
1414         condlog(3, "SIGHUP received");
1415
1416 #ifdef _DEBUG_
1417         dbg_free_final(NULL);
1418 #endif
1419 }
1420
1421 static void
1422 sigend (int sig)
1423 {
1424         exit_daemon(0);
1425 }
1426
1427 static void
1428 signal_init(void)
1429 {
1430         signal_set(SIGHUP, sighup);
1431         signal_set(SIGINT, sigend);
1432         signal_set(SIGTERM, sigend);
1433         signal_set(SIGKILL, sigend);
1434 }
1435
1436 static void
1437 setscheduler (void)
1438 {
1439         int res;
1440         static struct sched_param sched_param = {
1441                 sched_priority: 99
1442         };
1443
1444         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1445
1446         if (res == -1)
1447                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1448         return;
1449 }
1450
1451 static void
1452 set_oom_adj (int val)
1453 {
1454         FILE *fp;
1455
1456         fp = fopen("/proc/self/oom_adj", "w");
1457
1458         if (!fp)
1459                 return;
1460
1461         fprintf(fp, "%i", val);
1462         fclose(fp);
1463 }
1464         
1465 static int
1466 child (void * param)
1467 {
1468         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1469         pthread_attr_t attr;
1470         struct vectors * vecs;
1471
1472         mlockall(MCL_CURRENT | MCL_FUTURE);
1473
1474         if (logsink)
1475                 log_thread_start();
1476
1477         condlog(2, "--------start up--------");
1478         condlog(2, "read " DEFAULT_CONFIGFILE);
1479
1480         if (load_config(DEFAULT_CONFIGFILE))
1481                 exit(1);
1482
1483         setlogmask(LOG_UPTO(conf->verbosity + 3));
1484
1485         /*
1486          * fill the voids left in the config file
1487          */
1488         if (!conf->checkint) {
1489                 conf->checkint = CHECKINT;
1490                 conf->max_checkint = MAX_CHECKINT;
1491         }
1492
1493         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1494                 if (logsink)
1495                         log_thread_stop();
1496
1497                 exit(1);
1498         }
1499         signal_init();
1500         setscheduler();
1501         set_oom_adj(-17);
1502         vecs = init_paths();
1503
1504         if (!vecs)
1505                 exit(1);
1506
1507         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1508                 condlog(0, "can not find sysfs mount point");
1509                 exit(1);
1510         }
1511
1512         /*
1513          * fetch paths and multipaths lists
1514          * no paths and/or no multipaths are valid scenarii
1515          * vectors maintenance will be driven by events
1516          */
1517         path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
1518         map_discovery(vecs);
1519
1520         /*
1521          * start threads
1522          */
1523         pthread_attr_init(&attr);
1524         pthread_attr_setstacksize(&attr, 64 * 1024);
1525         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1526         
1527         pthread_create(&check_thr, &attr, checkerloop, vecs);
1528         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1529         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1530
1531         pthread_cond_wait(&exit_cond, &exit_mutex);
1532
1533         /*
1534          * exit path
1535          */
1536         lock(vecs->lock);
1537         remove_maps(vecs);
1538         free_pathvec(vecs->pathvec, FREE_PATHS);
1539
1540         pthread_cancel(check_thr);
1541         pthread_cancel(uevent_thr);
1542         pthread_cancel(uxlsnr_thr);
1543
1544         free_keys(keys);
1545         keys = NULL;
1546         free_handlers(handlers);
1547         handlers = NULL;
1548         free_polls();
1549
1550         unlock(vecs->lock);
1551         pthread_mutex_destroy(vecs->lock);
1552         FREE(vecs->lock);
1553         vecs->lock = NULL;
1554         FREE(vecs);
1555         vecs = NULL;
1556         free_config(conf);
1557         conf = NULL;
1558
1559         condlog(2, "--------shut down-------");
1560         
1561         if (logsink)
1562                 log_thread_stop();
1563
1564 #ifdef _DEBUG_
1565         dbg_free_final(NULL);
1566 #endif
1567
1568         exit(0);
1569 }
1570
1571 static int
1572 daemonize(void)
1573 {
1574         int pid;
1575         int in_fd, out_fd;
1576
1577         if( (pid = fork()) < 0){
1578                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1579                 return -1;
1580         }
1581         else if (pid != 0)
1582                 return pid;
1583
1584         setsid();
1585
1586         if ( (pid = fork()) < 0)
1587                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1588         else if (pid != 0)
1589                 _exit(0);
1590
1591         in_fd = open("/dev/null", O_RDONLY);
1592         if (in_fd < 0){
1593                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1594                         strerror(errno));
1595                 _exit(0);
1596         }
1597         out_fd = open("/dev/console", O_WRONLY);
1598         if (out_fd < 0){
1599                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1600                         strerror(errno));
1601                 _exit(0);
1602         }
1603
1604         close(STDIN_FILENO);
1605         dup(in_fd);
1606         close(STDOUT_FILENO);
1607         dup(out_fd);
1608         close(STDERR_FILENO);
1609         dup(out_fd);
1610
1611         close(in_fd);
1612         close(out_fd);
1613         chdir("/");
1614         umask(0);
1615         return 0;
1616 }
1617
1618 int
1619 main (int argc, char *argv[])
1620 {
1621         extern char *optarg;
1622         extern int optind;
1623         int arg;
1624         int err;
1625         
1626         logsink = 1;
1627
1628         if (getuid() != 0) {
1629                 fprintf(stderr, "need to be root\n");
1630                 exit(1);
1631         }
1632
1633         /* make sure we don't lock any path */
1634         chdir("/");
1635         umask(umask(077) | 022);
1636
1637         conf = alloc_config();
1638
1639         if (!conf)
1640                 exit(1);
1641
1642         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1643         switch(arg) {
1644                 case 'd':
1645                         logsink = 0;
1646                         //debug=1; /* ### comment me out ### */
1647                         break;
1648                 case 'v':
1649                         if (sizeof(optarg) > sizeof(char *) ||
1650                             !isdigit(optarg[0]))
1651                                 exit(1);
1652
1653                         conf->verbosity = atoi(optarg);
1654                         break;
1655                 case 'k':
1656                         uxclnt(optarg);
1657                         exit(0);
1658                 default:
1659                         ;
1660                 }
1661         }
1662
1663         if (!logsink)
1664                 err = 0;
1665         else
1666                 err = daemonize();
1667         
1668         if (err < 0)
1669                 /* error */
1670                 exit(1);
1671         else if (err > 0)
1672                 /* parent dies */
1673                 exit(0);
1674         else
1675                 /* child lives */
1676                 return (child(NULL));
1677 }