[multipathd] race bugs
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 #include <string.h>
2 #include <pthread.h>
3 #include <stdio.h>
4 #include <unistd.h>
5 #include <linux/unistd.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <libdevmapper.h>
11 #include <signal.h>
12 #include <wait.h>
13 #include <sched.h>
14 #include <errno.h>
15 #include <sys/mount.h>
16 #include <sys/mman.h>
17
18 /*
19  * libsysfs
20  */
21 #include <sysfs/libsysfs.h>
22 #include <sysfs/dlist.h>
23
24 /*
25  * libcheckers
26  */
27 #include <checkers.h>
28 #include <path_state.h>
29
30 /*
31  * libmultipath
32  */
33 #include <parser.h>
34 #include <vector.h>
35 #include <memory.h>
36 #include <config.h>
37 #include <callout.h>
38 #include <util.h>
39 #include <blacklist.h>
40 #include <hwtable.h>
41 #include <defaults.h>
42 #include <structs.h>
43 #include <dmparser.h>
44 #include <devmapper.h>
45 #include <dict.h>
46 #include <discovery.h>
47 #include <debug.h>
48 #include <propsel.h>
49 #include <uevent.h>
50 #include <switchgroup.h>
51 #include <path_state.h>
52
53 #include "main.h"
54 #include "copy.h"
55 #include "pidfile.h"
56 #include "uxlsnr.h"
57 #include "uxclnt.h"
58 #include "cli.h"
59 #include "cli_handlers.h"
60
61 #define FILE_NAME_SIZE 256
62 #define CMDSIZE 160
63
64 #define LOG_MSG(a,b) \
65         if (strlen(b)) { \
66                 condlog(a, "%s: %s", pp->dev_t, b); \
67                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
68         }
69
70 #ifdef LCKDBG
71 #define lock(a) \
72         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
73         pthread_mutex_lock(a)
74 #define unlock(a) \
75         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
76         pthread_mutex_unlock(a)
77 #else
78 #define lock(a) pthread_mutex_lock(a)
79 #define unlock(a) pthread_mutex_unlock(a)
80 #endif
81
82 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
83 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
84
85 /*
86  * structs
87  */
88 struct event_thread {
89         struct dm_task *dmt;
90         pthread_t thread;
91         int event_nr;
92         char mapname[WWID_SIZE];
93         struct paths *allpaths;
94 };
95
96 static struct event_thread *
97 alloc_waiter (void)
98 {
99
100         struct event_thread * wp;
101
102         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
103
104         return wp;
105 }
106
107 static void
108 cleanup_lock (void * data)
109 {
110         unlock((pthread_mutex_t *)data);
111 }
112
113 static void
114 set_paths_owner (struct paths * allpaths, struct multipath * mpp)
115 {
116         int i;
117         struct path * pp;
118
119         if (!mpp)
120                 return;
121
122         vector_foreach_slot (allpaths->pathvec, pp, i) {
123                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
124                         condlog(4, "%s ownership set", pp->dev_t);
125                         pp->mpp = mpp;
126                 }
127         }
128 }
129
130 static void
131 unset_paths_owner (struct paths * allpaths, struct multipath * mpp)
132 {
133         int i;
134         struct path * pp;
135
136         vector_foreach_slot (allpaths->pathvec, pp, i) {
137                 if (pp->mpp == mpp) {
138                         condlog(4, "%s is orphaned", pp->dev_t);
139                         pp->mpp = NULL;
140                 }
141         }
142 }
143
144 static int
145 update_multipath_table (struct multipath *mpp, vector pathvec)
146 {
147         if (!mpp)
148                 return 1;
149
150         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
151                 return 1;
152
153         if (disassemble_map(pathvec, mpp->params, mpp))
154                 return 1;
155
156         return 0;
157 }
158
159 static int
160 update_multipath_status (struct multipath *mpp)
161 {
162         if (!mpp)
163                 return 1;
164
165         if(dm_get_status(mpp->alias, mpp->status))
166                 return 1;
167
168         if (disassemble_status(mpp->status, mpp))
169                 return 1;
170
171         return 0;
172 }
173
174 static int
175 update_multipath_strings (struct multipath *mpp, vector pathvec)
176 {
177         if (mpp->selector) {
178                 FREE(mpp->selector);
179                 mpp->selector = NULL;
180         }
181
182         if (mpp->features) {
183                 FREE(mpp->features);
184                 mpp->features = NULL;
185         }
186
187         if (mpp->hwhandler) {
188                 FREE(mpp->hwhandler);
189                 mpp->hwhandler = NULL;
190         }
191
192         free_pgvec(mpp->pg, KEEP_PATHS);
193         mpp->pg = NULL;
194
195         if (update_multipath_table(mpp, pathvec))
196                 return 1;
197
198         if (update_multipath_status(mpp))
199                 return 1;
200
201         return 0;
202 }
203
204 static int
205 setup_multipath (struct paths * allpaths, struct multipath * mpp)
206 {
207         char * wwid;
208         int i;
209
210         wwid = get_mpe_wwid(mpp->alias);
211
212         if (wwid) {
213                 strncpy(mpp->wwid, wwid, WWID_SIZE);
214                 wwid = NULL;
215         } else
216                 strncpy(mpp->wwid, mpp->alias, WWID_SIZE);
217
218         condlog(4, "discovered map %s", mpp->alias);
219
220         if (update_multipath_strings(mpp, allpaths->pathvec))
221                 goto out;
222
223         set_paths_owner(allpaths, mpp);
224         mpp->mpe = find_mpe(mpp->wwid);
225         select_pgfailback(mpp);
226
227         return 0;
228 out:
229         /*
230          * purge the multipath vector
231          */
232         if ((i = find_slot(allpaths->mpvec, (void *)mpp)) != -1)
233                 vector_del_slot(allpaths->mpvec, i);
234
235         free_multipath(mpp, KEEP_PATHS);
236         condlog(0, "failed to setup multipath");
237         return 1;
238 }
239
240 static void
241 switch_pathgroup (struct multipath * mpp)
242 {
243         struct pathgroup * pgp;
244         struct path * pp;
245         int i, j;
246         
247         if (!mpp || mpp->pgfailback == FAILBACK_MANUAL)
248                 return;
249         /*
250          * Refresh path priority values
251          */
252         vector_foreach_slot (mpp->pg, pgp, i)
253                 vector_foreach_slot (pgp->paths, pp, j)
254                         pathinfo(pp, conf->hwtable, DI_PRIO);
255
256         select_path_group(mpp); /* sets mpp->nextpg */
257         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
258         
259         if (pgp && pgp->status != PGSTATE_ACTIVE) {
260                 dm_switchgroup(mpp->alias, mpp->nextpg);
261                 condlog(2, "%s: switch to path group #%i",
262                          mpp->alias, mpp->nextpg);
263         }
264 }
265
266 static int
267 update_multipath (struct paths *allpaths, char *mapname)
268 {
269         struct multipath *mpp;
270         struct pathgroup  *pgp;
271         struct path *pp;
272         int i, j;
273         int r = 1;
274
275         mpp = find_mp(allpaths->mpvec, mapname);
276
277         if (!mpp)
278                 goto out;
279
280         free_pgvec(mpp->pg, KEEP_PATHS);
281         mpp->pg = NULL;
282
283         if (setup_multipath(allpaths, mpp))
284                 goto out; /* mpp freed in setup_multipath */
285
286         /*
287          * compare checkers states with DM states
288          */
289         vector_foreach_slot (mpp->pg, pgp, i) {
290                 vector_foreach_slot (pgp->paths, pp, j) {
291                         if (pp->dmstate != PSTATE_FAILED)
292                                 continue;
293
294                         if (pp->state != PATH_DOWN) {
295                                 condlog(2, "%s: mark as failed", pp->dev_t);
296                                 pp->state = PATH_DOWN;
297
298                                 /*
299                                  * if opportune,
300                                  * schedule the next check earlier
301                                  */
302                                 if (pp->tick > conf->checkint)
303                                         pp->tick = conf->checkint;
304                         }
305                 }
306         }
307         r = 0;
308 out:
309         if (r)
310                 condlog(0, "failed to update multipath");
311
312         return r;
313 }
314
315 static void
316 free_waiter (void * data)
317 {
318         struct event_thread * wp = (struct event_thread *)data;
319
320         if (wp->dmt)
321                 dm_task_destroy(wp->dmt);
322         FREE(wp);
323 }
324
325 static sigset_t unblock_sigusr1(void)
326 {
327         sigset_t set, old;
328
329         sigemptyset(&set);
330         sigaddset(&set, SIGUSR1);
331         pthread_sigmask(SIG_UNBLOCK, &set, &old);
332         return old;
333 }
334
335 /*
336  * returns the reschedule delay
337  * negative means *stop*
338  */
339 static int
340 waiteventloop (struct event_thread * waiter)
341 {
342         sigset_t set;
343         int event_nr;
344         int r;
345
346         if (!waiter->event_nr)
347                 waiter->event_nr = dm_geteventnr(waiter->mapname);
348
349         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
350                 return 1;
351
352         if (!dm_task_set_name(waiter->dmt, waiter->mapname))
353                 return 1;
354
355         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
356                                                       waiter->event_nr))
357                 return 1;
358
359         dm_task_no_open_count(waiter->dmt);
360
361         set = unblock_sigusr1();
362         dm_task_run(waiter->dmt);
363         pthread_sigmask(SIG_SETMASK, &set, NULL);
364         pthread_testcancel();
365         dm_task_destroy(waiter->dmt);
366         waiter->dmt = NULL;
367
368         waiter->event_nr++;
369
370         /*
371          * upon event ...
372          */
373         while (1) {
374                 condlog(2, "devmap event (%i) on %s",
375                                 waiter->event_nr, waiter->mapname);
376
377                 /*
378                  * event might be :
379                  *
380                  * 1) a table reload, which means our mpp structure is
381                  *    obsolete : refresh it through update_multipath()
382                  * 2) a path failed by DM : mark as such through
383                  *    update_multipath()
384                  * 3) map has gone away : stop the thread.
385                  * 4) a path reinstate : nothing to do
386                  * 5) a switch group : nothing to do
387                  */
388                 pthread_cleanup_push(cleanup_lock, waiter->allpaths->lock);
389                 lock(waiter->allpaths->lock);
390
391                 r = update_multipath(waiter->allpaths, waiter->mapname);
392                 pthread_cleanup_pop(1);
393
394                 if (r)
395                         return -1; /* stop the thread */
396
397                 event_nr = dm_geteventnr(waiter->mapname);
398
399                 if (waiter->event_nr == event_nr)
400                         return 1; /* upon problem reschedule 1s later */
401
402                 waiter->event_nr = event_nr;
403         }
404         return -1; /* never reach there */
405 }
406
407 static void *
408 waitevent (void * et)
409 {
410         int r;
411         struct event_thread *waiter;
412
413         mlockall(MCL_CURRENT | MCL_FUTURE);
414
415         waiter = (struct event_thread *)et;
416         pthread_cleanup_push(free_waiter, et);
417
418         while (1) {
419                 r = waiteventloop(waiter);
420
421                 if (r < 0)
422                         break;
423
424                 pthread_testcancel();
425                 sleep(r);
426                 pthread_testcancel();
427         }
428
429         pthread_cleanup_pop(1);
430         return NULL;
431 }
432
433 static int
434 stop_waiter_thread (struct multipath * mpp, struct paths * allpaths)
435 {
436         struct event_thread * wp = (struct event_thread *)mpp->waiter;
437         pthread_t thread = wp->thread;
438         int r;
439
440         if (!wp)
441                 return 1;
442
443         condlog(2, "%s: reap event checker", wp->mapname);
444
445         if ((r = pthread_cancel(thread)))
446                 return r;
447
448         pthread_kill(thread, SIGUSR1);
449         return 0;
450 }
451
452 static int
453 start_waiter_thread (struct multipath * mpp, struct paths * allpaths)
454 {
455         pthread_attr_t attr;
456         struct event_thread * wp;
457
458         if (!mpp)
459                 return 0;
460
461         if (pthread_attr_init(&attr))
462                 goto out;
463
464         pthread_attr_setstacksize(&attr, 32 * 1024);
465         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
466
467         wp = alloc_waiter();
468
469         if (!wp)
470                 goto out;
471
472         mpp->waiter = (void *)wp;
473         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
474         wp->allpaths = allpaths;
475
476         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
477                 condlog(0, "%s: cannot create event checker", wp->mapname);
478                 goto out1;
479         }
480         condlog(2, "%s: event checker started", wp->mapname);
481
482         return 0;
483 out1:
484         free_waiter(wp);
485         mpp->waiter = NULL;
486 out:
487         condlog(0, "failed to start waiter thread");
488         return 1;
489 }
490
491 static void
492 remove_map (struct multipath * mpp, struct paths * allpaths)
493 {
494         int i;
495
496         /*
497          * stop the DM event waiter thread
498          */
499         if (stop_waiter_thread(mpp, allpaths)) {
500                 condlog(0, "%s: error canceling waiter thread", mpp->alias);
501                 /*
502                  * warrior mode
503                  */
504                 free_waiter(mpp->waiter);
505         }
506
507         /*
508          * clear references to this map
509          */
510         unset_paths_owner(allpaths, mpp);
511
512         /*
513          * purge the multipath vector
514          */
515         i = find_slot(allpaths->mpvec, (void *)mpp);
516         vector_del_slot(allpaths->mpvec, i);
517
518         /*
519          * final free
520          */
521         free_multipath(mpp, KEEP_PATHS);
522         mpp = NULL;
523 }
524
525 static void
526 remove_maps (struct paths * allpaths)
527 {
528         int i;
529         struct multipath * mpp;
530
531         vector_foreach_slot (allpaths->mpvec, mpp, i)
532                 remove_map(mpp, allpaths);
533
534         vector_free(allpaths->mpvec);
535         allpaths->mpvec = NULL;
536 }
537
538 int
539 uev_add_map (char * devname, struct paths * allpaths)
540 {
541         int major, minor, i;
542         char dev_t[BLK_DEV_SIZE];
543         char * alias;
544         struct multipath * mpp;
545
546         if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
547                 return 1;
548
549         if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
550                 return 1;
551
552         alias = dm_mapname(major, minor);
553                 
554         if (!alias)
555                 return 1;
556         
557         if (!dm_type(alias, DEFAULT_TARGET)) {
558                 condlog(4, "%s: not a multipath map", alias);
559                 FREE(alias);
560                 return 0;
561         }
562
563         mpp = find_mp(allpaths->mpvec, alias);
564
565         if (mpp) {
566                 /*
567                  * this should not happen,
568                  * we missed a remove map event (not sent ?)
569                  */
570                 condlog(2, "%s: already registered", alias);
571                 remove_map(mpp, allpaths);
572         }
573
574         /*
575          * now we can allocate
576          */
577         mpp = alloc_multipath();
578
579         if (!mpp)
580                 return 1;
581
582         mpp->minor = minor;
583         mpp->alias = alias;
584
585         if (setup_multipath(allpaths, mpp))
586                 return 1; /* mpp freed in setup_multipath */
587
588         if (!vector_alloc_slot(allpaths->mpvec))
589                 goto out;
590
591         vector_set_slot(allpaths->mpvec, mpp);
592         set_paths_owner(allpaths, mpp);
593
594         if (start_waiter_thread(mpp, allpaths))
595                 goto out;
596
597         condlog(2, "add %s devmap", mpp->alias);
598
599         return 0;
600 out:
601         condlog(2, "add %s devmap failed", mpp->alias);
602         /*
603          * purge the multipath vector
604          */
605         if ((i = find_slot(allpaths->mpvec, (void *)mpp)) != -1)
606                 vector_del_slot(allpaths->mpvec, i);
607
608         free_multipath(mpp, KEEP_PATHS);
609         return 1;
610 }
611
612 int
613 uev_remove_map (char * devname, struct paths * allpaths)
614 {
615         int minor;
616         struct multipath * mpp;
617
618         if (sscanf(devname, "dm-%d", &minor) != 1)
619                 return 1;
620
621         mpp = find_mp_by_minor(allpaths->mpvec, minor);
622
623         if (!mpp) {
624                 condlog(3, "%s: devmap not registered, can't remove",
625                         devname);
626                 return 1;
627         }
628
629         condlog(2, "remove %s devmap", mpp->alias);
630         remove_map(mpp, allpaths);
631
632         return 0;
633 }
634
635 int
636 uev_add_path (char * devname, struct paths * allpaths)
637 {
638         struct path * pp;
639
640         pp = find_path_by_dev(allpaths->pathvec, devname);
641
642         if (pp) {
643                 condlog(3, "%s: already in pathvec");
644                 return 1;
645         }
646         pp = store_pathinfo(allpaths->pathvec, conf->hwtable,
647                        devname, DI_SYSFS | DI_WWID);
648
649         if (!pp) {
650                 condlog(0, "%s: failed to store path info", devname);
651                 return 1;
652         }
653
654         condlog(2, "%s: path checker registered", devname);
655         pp->mpp = find_mp_by_wwid(allpaths->mpvec, pp->wwid);
656
657         if (pp->mpp)
658                 condlog(4, "%s: ownership set to %s",
659                                 pp->dev_t, pp->mpp->alias);
660         else
661                 condlog(4, "%s: orphaned", pp->dev_t);
662
663         return 0;
664 }
665
666 int
667 uev_remove_path (char * devname, struct paths * allpaths)
668 {
669         int i;
670         struct path * pp;
671
672         pp = find_path_by_dev(allpaths->pathvec, devname);
673
674         if (!pp) {
675                 condlog(3, "%s: not in pathvec");
676                 return 1;
677         }
678         condlog(2, "remove %s path checker", devname);
679         i = find_slot(allpaths->pathvec, (void *)pp);
680         vector_del_slot(allpaths->pathvec, i);
681         free_path(pp);
682
683         return 0;
684 }
685
686 char *
687 show_paths (struct paths * allpaths)
688 {
689         int i, j, k;
690         struct path * pp;
691         char * c;
692         char * reply;
693
694         reply = MALLOC(MAX_REPLY_LEN);
695
696         if (!reply)
697                 return NULL;
698
699         c = reply;
700         c += sprintf(c, "\n");
701
702         vector_foreach_slot(allpaths->pathvec, pp, i) {
703                 c += sprintf(c, "%10s: ", pp->dev);
704
705                 if (!pp->mpp) {
706                         c += sprintf(c, "[orphan]\n");
707                         continue;
708                 }
709
710                 if (MAX_REPLY_LEN - MAX_PSTATE_LEN < 0) {
711                         FREE(reply);
712                         return NULL;
713                 }
714
715                 j = pstate_snprintf(c, MAX_PSTATE_LEN, pp->state);
716                 c += j;
717                 j = MAX_PSTATE_LEN - j;
718                 
719                 while (j--)
720                         sprintf(c, " ");
721
722                 j = pp->tick;
723                 k = pp->checkint - pp->tick;
724                 c += sprintf(c, "%3i/%3i ", j, pp->checkint);
725
726                 while (j-- > 0)
727                         c += sprintf(c, "X");
728
729
730                 while (k-- > 0)
731                         c += sprintf(c, ".");
732
733                 c += sprintf(c, "\n");
734         }
735
736         reply[MAX_REPLY_LEN - 1] = 0;
737         return reply;
738 }
739
740 char *
741 show_maps (struct paths * allpaths)
742 {
743         int i, j, k;
744         struct multipath * mpp;
745         char * c;
746         char * reply;
747
748         reply = MALLOC(MAX_REPLY_LEN);
749
750         if (!reply)
751                 return NULL;
752
753         c = reply;
754         c += sprintf(c, "\n");
755
756         vector_foreach_slot(allpaths->mpvec, mpp, i) {
757                 c += sprintf(c, "%20s: ", mpp->alias);
758
759                 if (!mpp->failback_tick) {
760                         c += sprintf(c, "[no scheduled failback]\n");
761                         continue;
762                 }
763
764                 j = mpp->failback_tick;
765                 k = mpp->pgfailback - mpp->failback_tick;
766                 c += sprintf(c, "%3i/%3i ", j, mpp->pgfailback);
767
768                 while (j-- > 0)
769                         c += sprintf(c, "X");
770
771
772                 while (k-- > 0)
773                         c += sprintf(c, ".");
774
775                 c += sprintf(c, "\n");
776         }
777
778         reply[MAX_REPLY_LEN - 1] = 0;
779         return reply;
780 }
781
782 char *
783 uxsock_trigger (char * str, void * trigger_data)
784 {
785         struct paths * allpaths;
786         char * reply = NULL;
787
788         allpaths = (struct paths *)trigger_data;
789
790         pthread_cleanup_push(cleanup_lock, allpaths->lock);
791         lock(allpaths->lock);
792
793         reply = parse_cmd(str, allpaths);
794
795         if (!reply)
796                 reply = STRDUP("fail\n");
797
798         else if (strlen(reply) == 0)
799                 reply = STRDUP("ok\n");
800
801         pthread_cleanup_pop(1);
802
803         return reply;
804 }
805
806 int 
807 uev_trigger (struct uevent * uev, void * trigger_data)
808 {
809         int r = 0;
810         char devname[32];
811         struct paths * allpaths;
812
813         allpaths = (struct paths *)trigger_data;
814         pthread_cleanup_push(cleanup_lock, allpaths->lock);
815         lock(allpaths->lock);
816
817         if (strncmp(uev->devpath, "/block", 6))
818                 goto out;
819
820         basename(uev->devpath, devname);
821
822         /*
823          * device map add/remove event
824          */
825         if (!strncmp(devname, "dm-", 3)) {
826                 if (!strncmp(uev->action, "add", 3)) {
827                         r = uev_add_map(devname, allpaths);
828                         goto out;
829                 }
830                 if (!strncmp(uev->action, "remove", 6)) {
831                         r = uev_remove_map(devname, allpaths);
832                         goto out;
833                 }
834                 goto out;
835         }
836         
837         /*
838          * path add/remove event
839          */
840         if (blacklist(conf->blist, devname))
841                 goto out;
842
843         if (!strncmp(uev->action, "add", 3)) {
844                 r = uev_add_path(devname, allpaths);
845                 goto out;
846         }
847         if (!strncmp(uev->action, "remove", 6)) {
848                 r = uev_remove_path(devname, allpaths);
849                 goto out;
850         }
851
852 out:
853         FREE(uev);
854         pthread_cleanup_pop(1);
855         return r;
856 }
857
858 static void *
859 ueventloop (void * ap)
860 {
861         uevent_listen(&uev_trigger, ap);
862
863         return NULL;
864 }
865
866 static void *
867 uxlsnrloop (void * ap)
868 {
869         if (load_keys())
870                 return NULL;
871         
872         if (alloc_handlers())
873                 return NULL;
874
875         add_handler(LIST+PATHS, list_paths);
876         add_handler(LIST+MAPS, list_maps);
877         add_handler(ADD+PATH, add_path);
878         add_handler(DEL+PATH, del_path);
879         add_handler(ADD+MAP, add_map);
880         add_handler(DEL+MAP, del_map);
881         add_handler(SWITCH+MAP+GROUP, switch_group);
882
883         uxsock_listen(&uxsock_trigger, ap);
884
885         return NULL;
886 }
887
888 static int
889 exit_daemon (int status)
890 {
891         if (status != 0)
892                 fprintf(stderr, "bad exit status. see daemon.log\n");
893
894         condlog(3, "unlink pidfile");
895         unlink(DEFAULT_PIDFILE);
896
897         condlog(2, "--------shut down-------");
898         
899         lock(&exit_mutex);
900         pthread_cond_signal(&exit_cond);
901         unlock(&exit_mutex);
902
903         return status;
904 }
905
906 /*
907  * caller must have locked the path list before calling that function
908  */
909 static int
910 get_dm_mpvec (struct paths * allpaths)
911 {
912         int i;
913         struct multipath * mpp;
914
915         if (dm_get_maps(allpaths->mpvec, "multipath"))
916                 return 1;
917
918         vector_foreach_slot (allpaths->mpvec, mpp, i) {
919                 if (setup_multipath(allpaths, mpp))
920                         return 1;
921                 mpp->minor = dm_get_minor(mpp->alias);
922                 start_waiter_thread(mpp, allpaths);
923         }
924
925         return 0;
926 }
927
928 static void
929 fail_path (struct path * pp)
930 {
931         if (!pp->mpp)
932                 return;
933
934         condlog(2, "checker failed path %s in map %s",
935                  pp->dev_t, pp->mpp->alias);
936
937         dm_fail_path(pp->mpp->alias, pp->dev_t);
938 }
939
940 /*
941  * caller must have locked the path list before calling that function
942  */
943 static void
944 reinstate_path (struct path * pp)
945 {
946         if (pp->mpp) {
947                 if (dm_reinstate(pp->mpp->alias, pp->dev_t))
948                         condlog(0, "%s: reinstate failed", pp->dev_t);
949                 else
950                         condlog(2, "%s: reinstated", pp->dev_t);
951         }
952 }
953
954 static void
955 enable_group(struct path * pp)
956 {
957         struct pathgroup * pgp;
958
959         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
960         
961         if (pgp->status == PGSTATE_DISABLED) {
962                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
963                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
964         }
965 }
966
967 static void
968 mpvec_garbage_collector (struct paths * allpaths)
969 {
970         struct multipath * mpp;
971         int i;
972
973         vector_foreach_slot (allpaths->mpvec, mpp, i) {
974                 if (!dm_map_present(mpp->alias)) {
975                         condlog(2, "%s: remove dead map", mpp->alias);
976                         remove_map(mpp, allpaths);
977                 }
978         }
979 }
980
981 static void
982 defered_failback_tick (vector mpvec)
983 {
984         struct multipath * mpp;
985         int i;
986
987         vector_foreach_slot (mpvec, mpp, i) {
988                 /*
989                  * defered failback getting sooner
990                  */
991                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
992                         mpp->failback_tick--;
993
994                         if (!mpp->failback_tick)
995                                 switch_pathgroup(mpp);
996                 }
997         }
998 }
999
1000 static void *
1001 checkerloop (void *ap)
1002 {
1003         struct paths *allpaths;
1004         struct path *pp;
1005         int i, count = 0;
1006         int newstate;
1007         char checker_msg[MAX_CHECKER_MSG_SIZE];
1008
1009         mlockall(MCL_CURRENT | MCL_FUTURE);
1010
1011         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1012         allpaths = (struct paths *)ap;
1013
1014         condlog(2, "path checkers start up");
1015
1016         while (1) {
1017                 pthread_cleanup_push(cleanup_lock, allpaths->lock);
1018                 lock(allpaths->lock);
1019                 condlog(4, "tick");
1020
1021                 vector_foreach_slot (allpaths->pathvec, pp, i) {
1022                         if (!pp->mpp)
1023                                 continue;
1024
1025                         if (pp->tick) {
1026                                 /*
1027                                  * don't check this path yet
1028                                  */
1029                                 pp->tick--;
1030                                 continue;
1031                         }
1032
1033                         /*
1034                          * provision a next check soonest,
1035                          * in case we exit abnormaly from here
1036                          */
1037                         pp->tick = conf->checkint;
1038                         
1039                         if (!pp->checkfn) {
1040                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1041                                 select_checkfn(pp);
1042                         }
1043
1044                         if (!pp->checkfn) {
1045                                 condlog(0, "%s: checkfn is void", pp->dev);
1046                                 continue;
1047                         }
1048                         newstate = pp->checkfn(pp->fd, checker_msg,
1049                                                &pp->checker_context);
1050                         
1051                         if (newstate != pp->state) {
1052                                 pp->state = newstate;
1053                                 LOG_MSG(1, checker_msg);
1054
1055                                 /*
1056                                  * upon state change, reset the checkint
1057                                  * to the shortest delay
1058                                  */
1059                                 pp->checkint = conf->checkint;
1060
1061                                 if (newstate == PATH_DOWN ||
1062                                     newstate == PATH_SHAKY) {
1063                                         /*
1064                                          * proactively fail path in the DM
1065                                          */
1066                                         fail_path(pp);
1067
1068                                         /*
1069                                          * cancel scheduled failback
1070                                          */
1071                                         pp->mpp->failback_tick = 0;
1072
1073                                         continue;
1074                                 }
1075
1076                                 /*
1077                                  * reinstate this path
1078                                  */
1079                                 reinstate_path(pp);
1080
1081                                 /*
1082                                  * need to switch group ?
1083                                  */
1084                                 update_multipath_strings(pp->mpp,
1085                                                          allpaths->pathvec);
1086
1087                                 /*
1088                                  * schedule defered failback
1089                                  */
1090                                 if (pp->mpp->pgfailback > 0)
1091                                         pp->mpp->failback_tick =
1092                                                 pp->mpp->pgfailback;
1093
1094                                 if (pp->mpp->pgfailback == FAILBACK_IMMEDIATE)
1095                                         switch_pathgroup(pp->mpp);
1096
1097                                 /*
1098                                  * if at least one path is up in a group, and
1099                                  * the group is disabled, re-enable it
1100                                  */
1101                                 if (newstate == PATH_UP)
1102                                         enable_group(pp);
1103                         }
1104                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1105                                 LOG_MSG(4, checker_msg);
1106                                 /*
1107                                  * double the next check delay.
1108                                  * max at conf->max_checkint
1109                                  */
1110                                 if (pp->checkint < (conf->max_checkint / 2))
1111                                         pp->checkint = 2 * pp->checkint;
1112                                 else
1113                                         pp->checkint = conf->max_checkint;
1114
1115                                 pp->tick = pp->checkint;
1116                                 condlog(4, "%s: delay next check %is",
1117                                                 pp->dev_t, pp->tick);
1118
1119                         }
1120                         pp->state = newstate;
1121                 }
1122                 defered_failback_tick(allpaths->mpvec);
1123
1124                 if (count)
1125                         count--;
1126                 else {
1127                         condlog(4, "map garbage collection");
1128                         mpvec_garbage_collector(allpaths);
1129                         count = MAPGCINT;
1130                 }
1131                 
1132                 pthread_cleanup_pop(1);
1133                 sleep(1);
1134         }
1135         return NULL;
1136 }
1137
1138 static struct paths *
1139 init_paths (void)
1140 {
1141         struct paths * allpaths;
1142
1143         allpaths = (struct paths *)MALLOC(sizeof(struct paths));
1144
1145         if (!allpaths)
1146                 return NULL;
1147
1148         allpaths->lock = 
1149                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1150
1151         if (!allpaths->lock)
1152                 goto out;
1153
1154         allpaths->pathvec = vector_alloc();
1155
1156         if (!allpaths->pathvec)
1157                 goto out1;
1158                 
1159         allpaths->mpvec = vector_alloc();
1160
1161         if (!allpaths->mpvec)
1162                 goto out2;
1163         
1164         pthread_mutex_init(allpaths->lock, NULL);
1165
1166         return allpaths;
1167
1168 out2:
1169         vector_free(allpaths->pathvec);
1170 out1:
1171         FREE(allpaths->lock);
1172 out:
1173         FREE(allpaths);
1174         condlog(0, "failed to init paths");
1175         return NULL;
1176 }
1177
1178 static void *
1179 signal_set(int signo, void (*func) (int))
1180 {
1181         int r;
1182         struct sigaction sig;
1183         struct sigaction osig;
1184
1185         sig.sa_handler = func;
1186         sigemptyset(&sig.sa_mask);
1187         sig.sa_flags = 0;
1188
1189         r = sigaction(signo, &sig, &osig);
1190
1191         if (r < 0)
1192                 return (SIG_ERR);
1193         else
1194                 return (osig.sa_handler);
1195 }
1196
1197 static void
1198 sighup (int sig)
1199 {
1200         condlog(2, "SIGHUP received");
1201
1202 #ifdef _DEBUG_
1203         dbg_free_final(NULL);
1204 #endif
1205 }
1206
1207 static void
1208 sigend (int sig)
1209 {
1210         exit_daemon(0);
1211 }
1212
1213 static void
1214 signal_init(void)
1215 {
1216         signal_set(SIGHUP, sighup);
1217         signal_set(SIGINT, sigend);
1218         signal_set(SIGTERM, sigend);
1219         signal_set(SIGKILL, sigend);
1220 }
1221
1222 static void
1223 setscheduler (void)
1224 {
1225         int res;
1226         static struct sched_param sched_param = {
1227                 sched_priority: 99
1228         };
1229
1230         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1231
1232         if (res == -1)
1233                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1234         return;
1235 }
1236
1237 static void
1238 set_oom_adj (int val)
1239 {
1240         FILE *fp;
1241
1242         fp = fopen("/proc/self/oom_adj", "w");
1243
1244         if (!fp)
1245                 return;
1246
1247         fprintf(fp, "%i", val);
1248         fclose(fp);
1249 }
1250         
1251 static int
1252 child (void * param)
1253 {
1254         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1255         pthread_attr_t attr;
1256         struct paths * allpaths;
1257
1258         mlockall(MCL_CURRENT | MCL_FUTURE);
1259
1260         if (logsink)
1261                 log_thread_start();
1262
1263         condlog(2, "--------start up--------");
1264         condlog(2, "read " DEFAULT_CONFIGFILE);
1265
1266         if (load_config(DEFAULT_CONFIGFILE))
1267                 exit(1);
1268
1269         setlogmask(LOG_UPTO(conf->verbosity + 3));
1270
1271         /*
1272          * fill the voids left in the config file
1273          */
1274         if (!conf->checkint) {
1275                 conf->checkint = CHECKINT;
1276                 conf->max_checkint = MAX_CHECKINT;
1277         }
1278
1279         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1280                 if (logsink)
1281                         log_thread_stop();
1282
1283                 exit(1);
1284         }
1285         signal_init();
1286         setscheduler();
1287         set_oom_adj(-17);
1288         allpaths = init_paths();
1289
1290         if (!allpaths)
1291                 exit(1);
1292
1293         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1294                 condlog(0, "can not find sysfs mount point");
1295                 exit(1);
1296         }
1297
1298         /*
1299          * fetch paths and multipaths lists
1300          * no paths and/or no multipaths are valid scenarii
1301          * vectors maintenance will be driven by events
1302          */
1303         path_discovery(allpaths->pathvec, conf, DI_SYSFS | DI_WWID);
1304         get_dm_mpvec(allpaths);
1305
1306         /*
1307          * start threads
1308          */
1309         pthread_attr_init(&attr);
1310         pthread_attr_setstacksize(&attr, 64 * 1024);
1311         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1312         
1313         pthread_create(&check_thr, &attr, checkerloop, allpaths);
1314         pthread_create(&uevent_thr, &attr, ueventloop, allpaths);
1315         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, allpaths);
1316
1317         pthread_cond_wait(&exit_cond, &exit_mutex);
1318
1319         /*
1320          * exit path
1321          */
1322         lock(allpaths->lock);
1323         remove_maps(allpaths);
1324         free_pathvec(allpaths->pathvec, FREE_PATHS);
1325
1326         pthread_cancel(check_thr);
1327         pthread_cancel(uevent_thr);
1328         pthread_cancel(uxlsnr_thr);
1329
1330         free_keys(keys);
1331         free_handlers(handlers);
1332         free_polls();
1333
1334         unlock(allpaths->lock);
1335         pthread_mutex_destroy(allpaths->lock);
1336         FREE(allpaths->lock);
1337         FREE(allpaths);
1338         free_config(conf);
1339
1340         if (logsink)
1341                 log_thread_stop();
1342
1343 #ifdef _DEBUG_
1344         dbg_free_final(NULL);
1345 #endif
1346
1347         exit(0);
1348 }
1349
1350 int
1351 main (int argc, char *argv[])
1352 {
1353         extern char *optarg;
1354         extern int optind;
1355         int arg;
1356         int err;
1357         
1358         logsink = 1;
1359
1360         if (getuid() != 0) {
1361                 fprintf(stderr, "need to be root\n");
1362                 exit(1);
1363         }
1364
1365         /* make sure we don't lock any path */
1366         chdir("/");
1367         umask(umask(077) | 022);
1368
1369         conf = alloc_config();
1370
1371         if (!conf)
1372                 exit(1);
1373
1374         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1375         switch(arg) {
1376                 case 'd':
1377                         logsink = 0;
1378                         //debug=1; /* ### comment me out ### */
1379                         break;
1380                 case 'v':
1381                         if (sizeof(optarg) > sizeof(char *) ||
1382                             !isdigit(optarg[0]))
1383                                 exit(1);
1384
1385                         conf->verbosity = atoi(optarg);
1386                         break;
1387                 case 'k':
1388                         uxclnt(optarg);
1389                         exit(0);
1390                 default:
1391                         ;
1392                 }
1393         }
1394
1395         err = fork();
1396         
1397         if (err < 0)
1398                 /* error */
1399                 exit(1);
1400         else if (err > 0)
1401                 /* parent dies */
1402                 exit(0);
1403         else
1404                 /* child lives */
1405                 return (child(NULL));
1406 }