[multipathd] add "sg $map $pgindex" CLI command
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 #include <string.h>
2 #include <pthread.h>
3 #include <stdio.h>
4 #include <unistd.h>
5 #include <linux/unistd.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <libdevmapper.h>
11 #include <signal.h>
12 #include <wait.h>
13 #include <sched.h>
14 #include <errno.h>
15 #include <sys/mount.h>
16 #include <sys/mman.h>
17
18 /*
19  * libsysfs
20  */
21 #include <sysfs/libsysfs.h>
22 #include <sysfs/dlist.h>
23
24 /*
25  * libcheckers
26  */
27 #include <checkers.h>
28 #include <path_state.h>
29
30 /*
31  * libmultipath
32  */
33 #include <parser.h>
34 #include <vector.h>
35 #include <memory.h>
36 #include <config.h>
37 #include <callout.h>
38 #include <util.h>
39 #include <blacklist.h>
40 #include <hwtable.h>
41 #include <defaults.h>
42 #include <structs.h>
43 #include <dmparser.h>
44 #include <devmapper.h>
45 #include <dict.h>
46 #include <discovery.h>
47 #include <debug.h>
48 #include <propsel.h>
49 #include <uevent.h>
50 #include <switchgroup.h>
51
52 #include "main.h"
53 #include "copy.h"
54 #include "clone_platform.h"
55 #include "pidfile.h"
56 #include "uxlsnr.h"
57 #include "uxclnt.h"
58
59 #define FILE_NAME_SIZE 256
60 #define CMDSIZE 160
61 #define MAX_REPLY_LEN 1000
62
63 #define CALLOUT_DIR "/var/cache/multipathd"
64
65 #define LOG_MSG(a,b) \
66         if (strlen(a)) { \
67                 condlog(1, "%s: %s", b, a); \
68                 memset(a, 0, MAX_CHECKER_MSG_SIZE); \
69         }
70
71 #ifdef LCKDBG
72 #define lock(a) \
73         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_lock(a)
75 #define unlock(a) \
76         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
77         pthread_mutex_unlock(a)
78 #else
79 #define lock(a) pthread_mutex_lock(a)
80 #define unlock(a) pthread_mutex_unlock(a)
81 #endif
82
83 /*
84  * structs
85  */
86 struct paths {
87         pthread_mutex_t *lock;
88         vector pathvec;
89         vector mpvec;
90 };
91
92 struct event_thread {
93         pthread_t *thread;
94         int event_nr;
95         char mapname[WWID_SIZE];
96         struct paths *allpaths;
97 };
98
99 static void *
100 alloc_waiter (void)
101 {
102
103         struct event_thread * wp;
104
105         wp = MALLOC(sizeof(struct event_thread));
106
107         if (!wp)
108                 return NULL;
109
110         wp->thread = MALLOC(sizeof(pthread_t));
111
112         if (!wp->thread)
113                 goto out;
114                 
115         return wp;
116
117 out:
118         free(wp);
119         condlog(0, "failed to alloc waiter");
120         return NULL;
121 }
122
123 static void
124 set_paths_owner (struct paths * allpaths, struct multipath * mpp)
125 {
126         int i;
127         struct path * pp;
128
129         if (!mpp)
130                 return;
131
132         vector_foreach_slot (allpaths->pathvec, pp, i) {
133                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
134                         condlog(4, "%s ownership set", pp->dev_t);
135                         pp->mpp = mpp;
136                 }
137         }
138 }
139
140 static void
141 unset_paths_owner (struct paths * allpaths, struct multipath * mpp)
142 {
143         int i;
144         struct path * pp;
145
146         vector_foreach_slot (allpaths->pathvec, pp, i) {
147                 if (pp->mpp == mpp) {
148                         condlog(4, "%s is orphaned", pp->dev_t);
149                         pp->mpp = NULL;
150                 }
151         }
152 }
153
154 static int
155 update_multipath_table (struct multipath *mpp, vector pathvec)
156 {
157         if (!mpp)
158                 return 1;
159
160         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
161                 return 1;
162
163         if(disassemble_map(pathvec, mpp->params, mpp))
164                 return 1;
165
166         return 0;
167 }
168
169 static int
170 update_multipath_status (struct multipath *mpp)
171 {
172         if (!mpp)
173                 return 1;
174
175         if(dm_get_status(mpp->alias, mpp->status))
176                 return 1;
177
178         if (disassemble_status(mpp->status, mpp))
179                 return 1;
180
181         return 0;
182 }
183
184 static int
185 update_multipath_strings (struct multipath *mpp, vector pathvec)
186 {
187         if (update_multipath_table(mpp, pathvec))
188                 return 1;
189
190         if (update_multipath_status(mpp))
191                 return 1;
192
193         return 0;
194 }
195
196 static int
197 setup_multipath (struct paths * allpaths, struct multipath * mpp)
198 {
199         char * wwid;
200
201         wwid = get_mpe_wwid(mpp->alias);
202
203         if (wwid) {
204                 strncpy(mpp->wwid, wwid, WWID_SIZE);
205                 wwid = NULL;
206         } else
207                 strncpy(mpp->wwid, mpp->alias, WWID_SIZE);
208
209         condlog(4, "discovered map %s", mpp->alias);
210
211         if (update_multipath_strings(mpp, allpaths->pathvec))
212                 goto out;
213
214         set_paths_owner(allpaths, mpp);
215         mpp->mpe = find_mpe(mpp->wwid);
216         select_pgfailback(mpp);
217
218         return 0;
219 out:
220         free_multipath(mpp, KEEP_PATHS);
221         condlog(0, "failed to setup multipath");
222         return 1;
223 }
224
225 static void
226 switch_to_pathgroup (char * str)
227 {
228         char * mapname;
229         char * buff;
230         char * p;
231
232         p = str;
233         p += get_word(p, &mapname);
234
235         if (!mapname)
236                 return;
237
238         p += get_word(p, &buff);
239
240         if (!buff)
241                 goto out;
242
243         dm_switchgroup(mapname, atoi(buff));
244         FREE(buff);
245 out:
246         FREE(mapname);
247         return;
248 }
249         
250 static void
251 switch_pathgroup (struct multipath * mpp)
252 {
253         struct pathgroup * pgp;
254         struct path * pp;
255         int i, j;
256         
257         if (!mpp || mpp->pgfailback == FAILBACK_MANUAL)
258                 return;
259         /*
260          * Refresh path priority values
261          */
262         vector_foreach_slot (mpp->pg, pgp, i)
263                 vector_foreach_slot (pgp->paths, pp, j)
264                         pathinfo(pp, conf->hwtable, DI_PRIO);
265
266         select_path_group(mpp); /* sets mpp->nextpg */
267         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
268         
269         if (pgp && pgp->status != PGSTATE_ACTIVE) {
270                 dm_switchgroup(mpp->alias, mpp->nextpg);
271                 condlog(2, "%s: switch to path group #%i",
272                          mpp->alias, mpp->nextpg);
273         }
274 }
275
276 static int
277 update_multipath (struct paths *allpaths, char *mapname)
278 {
279         struct multipath *mpp;
280         struct pathgroup  *pgp;
281         struct path *pp;
282         int i, j;
283         int r = 1;
284
285         lock(allpaths->lock);
286         mpp = find_mp(allpaths->mpvec, mapname);
287
288         if (!mpp)
289                 goto out;
290
291         free_pgvec(mpp->pg, KEEP_PATHS);
292         mpp->pg = NULL;
293
294         setup_multipath(allpaths, mpp);
295
296         /*
297          * compare checkers states with DM states
298          */
299         vector_foreach_slot (mpp->pg, pgp, i) {
300                 vector_foreach_slot (pgp->paths, pp, j) {
301                         if (pp->dmstate != PSTATE_FAILED)
302                                 continue;
303
304                         if (pp->state != PATH_DOWN) {
305                                 condlog(2, "%s: mark as failed", pp->dev_t);
306                                 pp->state = PATH_DOWN;
307
308                                 /*
309                                  * if opportune,
310                                  * schedule the next check earlier
311                                  */
312                                 if (pp->tick > conf->checkint)
313                                         pp->tick = conf->checkint;
314                         }
315                 }
316         }
317         r = 0;
318 out:
319         unlock(allpaths->lock);
320
321         if (r)
322                 condlog(0, "failed to update multipath");
323
324         return r;
325 }
326
327 /*
328  * returns the reschedule delay
329  * negative means *stop*
330  */
331 static int
332 waiteventloop (struct event_thread * waiter)
333 {
334         struct dm_task *dmt;
335         int event_nr;
336         int r = 1; /* upon problem reschedule 1s later */
337
338         if (!waiter->event_nr)
339                 waiter->event_nr = dm_geteventnr(waiter->mapname);
340
341         if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
342                 goto out;
343
344         if (!dm_task_set_name(dmt, waiter->mapname))
345                 goto out;
346
347         if (waiter->event_nr && !dm_task_set_event_nr(dmt, waiter->event_nr))
348                 goto out;
349
350         dm_task_no_open_count(dmt);
351
352         dm_task_run(dmt);
353
354         waiter->event_nr++;
355
356         /*
357          * upon event ...
358          */
359         while (1) {
360                 condlog(2, "devmap event (%i) on %s",
361                                 waiter->event_nr, waiter->mapname);
362
363                 /*
364                  * event might be :
365                  *
366                  * 1) a table reload, which means our mpp structure is
367                  *    obsolete : refresh it through update_multipath()
368                  * 2) a path failed by DM : mark as such through
369                  *    update_multipath()
370                  * 3) map has gone away : stop the thread.
371                  * 4) a path reinstate : nothing to do
372                  * 5) a switch group : nothing to do
373                  */
374                 if (update_multipath(waiter->allpaths, waiter->mapname)) {
375                         r = -1; /* stop the thread */
376                         goto out;
377                 }
378                 event_nr = dm_geteventnr(waiter->mapname);
379
380                 if (waiter->event_nr == event_nr)
381                         break;
382
383                 waiter->event_nr = event_nr;
384         }
385
386 out:
387         dm_task_destroy(dmt);
388         return r;
389 }
390
391 static void *
392 waitevent (void * et)
393 {
394         int r;
395         struct event_thread *waiter;
396
397         mlockall(MCL_CURRENT | MCL_FUTURE);
398
399         waiter = (struct event_thread *)et;
400         pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
401
402         while (1) {
403                 r = waiteventloop(waiter);
404
405                 if (r < 0)
406                         break;
407
408                 sleep(r);
409         }
410
411         pthread_exit(waiter->thread);
412
413         return NULL;
414 }
415
416 static void
417 free_waiter (struct event_thread * wp)
418 {
419         free(wp->thread);
420         free(wp);
421 }
422
423 static int
424 stop_waiter_thread (struct multipath * mpp, struct paths * allpaths)
425 {
426         struct event_thread * wp;
427
428         if (!mpp)
429                 return 0;
430
431         wp = (struct event_thread *)mpp->waiter;
432
433         if (!wp)
434                 return 1;
435
436         condlog(2, "reap event checker : %s", wp->mapname);
437         pthread_cancel(*wp->thread);
438         free_waiter(wp);
439
440         return 0;
441 }
442
443 static int
444 start_waiter_thread (struct multipath * mpp, struct paths * allpaths)
445 {
446         pthread_attr_t attr;
447         struct event_thread * wp;
448
449         if (!mpp)
450                 return 0;
451
452         if (pthread_attr_init(&attr))
453                 return 1;
454
455         pthread_attr_setstacksize(&attr, 32 * 1024);
456         wp = alloc_waiter();
457
458         if (!wp)
459                 return 1;
460
461         mpp->waiter = (void *)wp;
462         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
463         wp->allpaths = allpaths;
464
465         if (pthread_create(wp->thread, &attr, waitevent, wp)) {
466                 condlog(0, "%s: cannot create event checker", wp->mapname);
467                 goto out;
468         }
469         condlog(2, "%s: event checker started", wp->mapname);
470
471         return 0;
472 out:
473         free_waiter(wp);
474         mpp->waiter = NULL;
475         condlog(0, "failed to start waiter thread");
476         return 1;
477 }
478
479 static void
480 remove_map (struct multipath * mpp, struct paths * allpaths)
481 {
482         int i;
483
484         stop_waiter_thread(mpp, allpaths);
485         i = find_slot(allpaths->mpvec, (void *)mpp);
486         vector_del_slot(allpaths->mpvec, i);
487         free_multipath(mpp, KEEP_PATHS);
488         unset_paths_owner(allpaths, mpp);
489 }
490
491 static int
492 uev_add_map (char * devname, struct paths * allpaths)
493 {
494         int major, minor;
495         char dev_t[BLK_DEV_SIZE];
496         char * buff;
497         struct multipath * mpp;
498
499         if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
500                 return 1;
501
502         if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
503                 return 1;
504
505         buff = dm_mapname(major, minor, "multipath");
506                 
507         if (!buff)
508                 return 1;
509         
510         mpp = find_mp(allpaths->mpvec, buff);
511
512         if (mpp) {
513                 /*
514                  * devmap already in mpvec
515                  * but remove DM uevent are somewhet unreliable
516                  * so for now consider safer to remove and re-add the map
517                  */
518                 condlog(2, "%s: remove dead config", mpp->alias);
519                 remove_map(mpp, allpaths);
520                 mpp = NULL;
521         }
522         if (!mpp) {
523                 mpp = alloc_multipath();
524
525                 if (!mpp)
526                         return 1;
527
528                 mpp->minor = minor;
529                 mpp->alias = MALLOC(strlen(buff) + 1);
530
531                 if (!mpp->alias)
532                         goto out;
533
534                 strncat(mpp->alias, buff, strlen(buff));
535
536                 dm_get_map(mpp->alias, &mpp->size, mpp->params);
537                 dm_get_status(mpp->alias, mpp->status);
538
539                 if (setup_multipath(allpaths, mpp))
540                         return 1; /* mpp freed in setup_multipath */
541
542                 if (!vector_alloc_slot(allpaths->mpvec))
543                         goto out;
544
545                 vector_set_slot(allpaths->mpvec, mpp);
546                 set_paths_owner(allpaths, mpp);
547
548                 if (start_waiter_thread(mpp, allpaths))
549                         goto out;
550         }
551         return 0;
552 out:
553         free_multipath(mpp, KEEP_PATHS);
554         return 1;
555 }
556
557 static int
558 uev_remove_map (char * devname, struct paths * allpaths)
559 {
560         int minor;
561         struct multipath * mpp;
562
563         minor = atoi(devname + 3);
564         mpp = find_mp_by_minor(allpaths->mpvec, minor);
565
566         if (mpp)
567                 remove_map(mpp, allpaths);
568
569         return 0;
570 }
571
572 static int
573 uev_add_path (char * devname, struct paths * allpaths)
574 {
575         struct path * pp;
576
577         pp = find_path_by_dev(allpaths->pathvec, devname);
578
579         if (pp) {
580                 condlog(3, "%s: already in pathvec");
581                 return 0;
582         }
583         condlog(2, "add %s path checker", devname);
584         pp = store_pathinfo(allpaths->pathvec, conf->hwtable,
585                        devname, DI_SYSFS | DI_WWID);
586
587         if (!pp)
588                 return 1;
589
590         pp->mpp = find_mp_by_wwid(allpaths->mpvec, pp->wwid);
591
592         if (pp->mpp)
593                 condlog(4, "%s: ownership set to %s",
594                                 pp->dev_t, pp->mpp->alias);
595         else
596                 condlog(4, "%s: orphaned", pp->dev_t);
597
598         return 0;
599 }
600
601 static int
602 uev_remove_path (char * devname, struct paths * allpaths)
603 {
604         int i;
605         struct path * pp;
606
607         pp = find_path_by_dev(allpaths->pathvec, devname);
608
609         if (!pp) {
610                 condlog(3, "%s: not in pathvec");
611                 return 0;
612         }
613         condlog(2, "remove %s path checker", devname);
614         i = find_slot(allpaths->pathvec, (void *)pp);
615         vector_del_slot(allpaths->pathvec, i);
616         free_path(pp);
617
618         return 0;
619 }
620
621 static char *
622 show_paths (struct paths * allpaths)
623 {
624         int i, j, k;
625         struct path * pp;
626         char * c;
627         char * reply;
628
629         reply = MALLOC(MAX_REPLY_LEN);
630
631         if (!reply)
632                 return NULL;
633
634         c = reply;
635         c += sprintf(c, "\n");
636
637         vector_foreach_slot(allpaths->pathvec, pp, i) {
638                 c += sprintf(c, "%10s: ", pp->dev);
639
640                 if (!pp->mpp) {
641                         c += sprintf(c, "[orphan]\n");
642                         continue;
643                 }
644
645                 c += sprintf(c, "state %i, ", pp->state);
646
647                 j = pp->tick;
648                 k = pp->checkint - pp->tick;
649                 c += sprintf(c, "%3i/%3i ", j, pp->checkint);
650
651                 while (j-- > 0)
652                         c += sprintf(c, "X");
653
654
655                 while (k-- > 0)
656                         c += sprintf(c, ".");
657
658                 c += sprintf(c, "\n");
659         }
660
661         return reply;
662 }
663
664 static char *
665 show_maps (struct paths * allpaths)
666 {
667         int i, j, k;
668         struct multipath * mpp;
669         char * c;
670         char * reply;
671
672         reply = MALLOC(MAX_REPLY_LEN);
673
674         if (!reply)
675                 return NULL;
676
677         c = reply;
678         c += sprintf(c, "\n");
679
680         vector_foreach_slot(allpaths->mpvec, mpp, i) {
681                 c += sprintf(c, "%20s: ", mpp->alias);
682
683                 if (!mpp->failback_tick) {
684                         c += sprintf(c, "[no scheduled failback]\n");
685                         continue;
686                 }
687
688                 j = mpp->failback_tick;
689                 k = mpp->pgfailback - mpp->failback_tick;
690                 c += sprintf(c, "%3i/%3i ", j, mpp->pgfailback);
691
692                 while (j-- > 0)
693                         c += sprintf(c, "X");
694
695
696                 while (k-- > 0)
697                         c += sprintf(c, ".");
698
699                 c += sprintf(c, "\n");
700         }
701
702         return reply;
703 }
704
705 char *
706 uxsock_trigger (char * str, void * trigger_data)
707 {
708         struct paths * allpaths;
709         char * reply = NULL;
710
711         allpaths = (struct paths *)trigger_data;
712
713         lock(allpaths->lock);
714
715         if (*str == 'l' && *(str + 1) == 'p')
716                 reply = show_paths(allpaths);
717
718         else if (*str == 'l' && *(str + 1) == 'm')
719                 reply = show_maps(allpaths);
720
721         else if (*str == 'r' && *(str + 1) == 'p')
722                 uev_remove_path(str + 3, allpaths);
723
724         else if (*str == 'a' && *(str + 1) == 'p')
725                 uev_add_path(str + 3, allpaths);
726
727         else if (*str == 'r' && *(str + 1) == 'm')
728                 uev_remove_map(str + 3, allpaths);
729
730         else if (*str == 'a' && *(str + 1) == 'm')
731                 uev_add_map(str + 3, allpaths);
732
733         else if (*str == 's' && *(str + 1) == 'g')
734                 switch_to_pathgroup(str + 3);
735
736         if (!reply)
737                 asprintf(&reply, "ok\n");
738
739         unlock(allpaths->lock);
740
741         return reply;
742 }
743
744 int 
745 uev_trigger (struct uevent * uev, void * trigger_data)
746 {
747         int r = 0;
748         char devname[32];
749         struct paths * allpaths;
750
751         allpaths = (struct paths *)trigger_data;
752         lock(allpaths->lock);
753
754         if (strncmp(uev->devpath, "/block", 6))
755                 goto out;
756
757         basename(uev->devpath, devname);
758
759         /*
760          * device map add/remove event
761          */
762         if (!strncmp(devname, "dm-", 3)) {
763                 condlog(2, "%s %s devmap", uev->action, devname);
764
765                 if (!strncmp(uev->action, "add", 3)) {
766                         r = uev_add_map(devname, allpaths);
767                         goto out;
768                 }
769                 if (!strncmp(uev->action, "remove", 6)) {
770                         r = uev_remove_map(devname, allpaths);
771                         goto out;
772                 }
773                 goto out;
774         }
775         
776         /*
777          * path add/remove event
778          */
779         if (blacklist(conf->blist, devname))
780                 goto out;
781
782         if (!strncmp(uev->action, "add", 3)) {
783                 r = uev_add_path(devname, allpaths);
784                 goto out;
785         }
786         if (!strncmp(uev->action, "remove", 6)) {
787                 r = uev_remove_path(devname, allpaths);
788                 goto out;
789         }
790
791 out:
792         FREE(uev);
793         unlock(allpaths->lock);
794         return r;
795 }
796
797 static void *
798 ueventloop (void * ap)
799 {
800         uevent_listen(&uev_trigger, ap);
801
802         return NULL;
803 }
804
805 static void *
806 uxlsnrloop (void * ap)
807 {
808         uxsock_listen(&uxsock_trigger, ap);
809
810         return NULL;
811 }
812
813 static void
814 strvec_free (vector vec)
815 {
816         int i;
817         char * str;
818
819         vector_foreach_slot (vec, str, i)
820                 if (str)
821                         FREE(str);
822
823         vector_free(vec);
824 }
825
826 static int
827 exit_daemon (int status)
828 {
829         if (status != 0)
830                 fprintf(stderr, "bad exit status. see daemon.log\n");
831
832         condlog(3, "umount ramfs");
833         umount(CALLOUT_DIR);
834
835         condlog(3, "unlink pidfile");
836         unlink(DEFAULT_PIDFILE);
837
838         condlog(2, "--------shut down-------");
839         
840         if (logsink)
841                 log_thread_stop();
842
843         exit(status);
844 }
845
846 /*
847  * caller must have locked the path list before calling that function
848  */
849 static int
850 get_dm_mpvec (struct paths * allpaths)
851 {
852         int i;
853         struct multipath * mpp;
854
855         if (dm_get_maps(allpaths->mpvec, "multipath"))
856                 return 1;
857
858         vector_foreach_slot (allpaths->mpvec, mpp, i) {
859                 setup_multipath(allpaths, mpp);
860                 mpp->minor = dm_get_minor(mpp->alias);
861                 start_waiter_thread(mpp, allpaths);
862         }
863
864         return 0;
865 }
866
867 static void
868 fail_path (struct path * pp)
869 {
870         if (!pp->mpp)
871                 return;
872
873         condlog(2, "checker failed path %s in map %s",
874                  pp->dev_t, pp->mpp->alias);
875
876         dm_fail_path(pp->mpp->alias, pp->dev_t);
877 }
878
879 /*
880  * caller must have locked the path list before calling that function
881  */
882 static void
883 reinstate_path (struct path * pp)
884 {
885         if (pp->mpp) {
886                 if (dm_reinstate(pp->mpp->alias, pp->dev_t))
887                         condlog(0, "%s: reinstate failed", pp->dev_t);
888                 else
889                         condlog(2, "%s: reinstated", pp->dev_t);
890         }
891 }
892
893 static void
894 enable_group(struct path * pp)
895 {
896         struct pathgroup * pgp;
897
898         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
899         
900         if (pgp->status == PGSTATE_DISABLED) {
901                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
902                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
903         }
904 }
905
906 static void *
907 checkerloop (void *ap)
908 {
909         struct paths *allpaths;
910         struct path *pp;
911         int i;
912         int newstate;
913         char checker_msg[MAX_CHECKER_MSG_SIZE];
914
915         mlockall(MCL_CURRENT | MCL_FUTURE);
916
917         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
918         allpaths = (struct paths *)ap;
919
920         condlog(2, "path checkers start up");
921
922         while (1) {
923                 lock(allpaths->lock);
924                 condlog(4, "tick");
925
926                 vector_foreach_slot (allpaths->pathvec, pp, i) {
927                         if (!pp->mpp)
928                                 continue;
929
930                         if (pp->tick) {
931                                 /*
932                                  * don't check this path yet
933                                  */
934                                 pp->tick--;
935                                 continue;
936                         }
937
938                         /*
939                          * provision a next check soonest,
940                          * in case we exit abnormaly from here
941                          */
942                         pp->tick = conf->checkint;
943                         
944                         if (!pp->checkfn) {
945                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
946                                 select_checkfn(pp);
947                         }
948
949                         if (!pp->checkfn) {
950                                 condlog(0, "%s: checkfn is void", pp->dev);
951                                 continue;
952                         }
953                         newstate = pp->checkfn(pp->fd, checker_msg,
954                                                &pp->checker_context);
955                         
956                         if (newstate != pp->state) {
957                                 pp->state = newstate;
958                                 LOG_MSG(checker_msg, pp->dev_t);
959
960                                 /*
961                                  * upon state change, reset the checkint
962                                  * to the shortest delay
963                                  */
964                                 pp->checkint = conf->checkint;
965
966                                 if (newstate == PATH_DOWN ||
967                                     newstate == PATH_SHAKY) {
968                                         /*
969                                          * proactively fail path in the DM
970                                          */
971                                         fail_path(pp);
972
973                                         /*
974                                          * cancel scheduled failback
975                                          */
976                                         pp->mpp->failback_tick = 0;
977
978                                         continue;
979                                 }
980
981                                 /*
982                                  * reinstate this path
983                                  */
984                                 reinstate_path(pp);
985
986                                 /*
987                                  * need to switch group ?
988                                  */
989                                 update_multipath_strings(pp->mpp,
990                                                          allpaths->pathvec);
991
992                                 /*
993                                  * schedule defered failback
994                                  */
995                                 if (pp->mpp->pgfailback > 0)
996                                         pp->mpp->failback_tick =
997                                                 pp->mpp->pgfailback;
998
999                                 if (pp->mpp->pgfailback == FAILBACK_IMMEDIATE)
1000                                         switch_pathgroup(pp->mpp);
1001
1002                                 /*
1003                                  * if at least one path is up in a group, and
1004                                  * the group is disabled, re-enable it
1005                                  */
1006                                 if (newstate == PATH_UP)
1007                                         enable_group(pp);
1008                         }
1009                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1010                                 /*
1011                                  * PATH_UP for last two checks
1012                                  * defered failback getting sooner
1013                                  */
1014                                 if (pp->mpp->pgfailback > 0) {
1015                                         if (pp->mpp->failback_tick > 0) {
1016                                                 pp->mpp->failback_tick--;
1017
1018                                                 if (!pp->mpp->failback_tick)
1019                                                         switch_pathgroup(pp->mpp);
1020                                         }
1021                                 }
1022                                 
1023                                 /*
1024                                  * and double the next check delay.
1025                                  * max at conf->max_checkint
1026                                  */
1027                                 if (pp->checkint < (conf->max_checkint / 2))
1028                                         pp->checkint = 2 * pp->checkint;
1029                                 else
1030                                         pp->checkint = conf->max_checkint;
1031
1032                                 pp->tick = pp->checkint;
1033                                 condlog(4, "%s: delay next check %is",
1034                                                 pp->dev_t, pp->tick);
1035
1036                         }
1037                         pp->state = newstate;
1038                 }
1039                 unlock(allpaths->lock);
1040                 sleep(1);
1041         }
1042         return NULL;
1043 }
1044
1045 static struct paths *
1046 init_paths (void)
1047 {
1048         struct paths *allpaths;
1049
1050         allpaths = MALLOC(sizeof(struct paths));
1051
1052         if (!allpaths)
1053                 return NULL;
1054
1055         allpaths->lock = 
1056                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1057
1058         if (!allpaths->lock)
1059                 goto out;
1060
1061         allpaths->pathvec = vector_alloc();
1062
1063         if (!allpaths->pathvec)
1064                 goto out1;
1065                 
1066         allpaths->mpvec = vector_alloc();
1067
1068         if (!allpaths->mpvec)
1069                 goto out2;
1070         
1071         pthread_mutex_init(allpaths->lock, NULL);
1072
1073         return allpaths;
1074
1075 out2:
1076         vector_free(allpaths->pathvec);
1077 out1:
1078         FREE(allpaths->lock);
1079 out:
1080         FREE(allpaths);
1081         condlog(0, "failed to init paths");
1082         return NULL;
1083 }
1084
1085 /*
1086  * this logic is all about keeping callouts working in case of
1087  * system disk outage (think system over SAN)
1088  * this needs the clone syscall, so don't bother if not present
1089  * (Debian Woody)
1090  */
1091 #ifdef CLONE_NEWNS
1092 static int
1093 prepare_namespace(void)
1094 {
1095         mode_t mode = S_IRWXU;
1096         struct stat *buf;
1097         char ramfs_args[64];
1098         int i;
1099         int fd;
1100         char * bin;
1101         size_t size = 10;
1102         struct stat statbuf;
1103         
1104         buf = MALLOC(sizeof(struct stat));
1105
1106         /*
1107          * create a temp mount point for ramfs
1108          */
1109         if (stat(CALLOUT_DIR, buf) < 0) {
1110                 if (mkdir(CALLOUT_DIR, mode) < 0) {
1111                         condlog(0, "cannot create " CALLOUT_DIR);
1112                         return -1;
1113                 }
1114                 condlog(4, "created " CALLOUT_DIR);
1115         }
1116
1117         /*
1118          * compute the optimal ramdisk size
1119          */
1120         vector_foreach_slot (conf->binvec, bin,i) {
1121                 if ((fd = open(bin, O_RDONLY)) < 0) {
1122                         condlog(0, "cannot open %s", bin);
1123                         return -1;
1124                 }
1125                 if (fstat(fd, &statbuf) < 0) {
1126                         condlog(0, "cannot stat %s", bin);
1127                         return -1;
1128                 }
1129                 size += statbuf.st_size;
1130                 close(fd);
1131         }
1132         condlog(3, "ramfs maxsize is %u", (unsigned int) size);
1133         
1134         /*
1135          * mount the ramfs
1136          */
1137         if (safe_sprintf(ramfs_args, "maxsize=%u", (unsigned int) size)) {
1138                 fprintf(stderr, "ramfs_args too small\n");
1139                 return -1;
1140         }
1141         if (mount(NULL, CALLOUT_DIR, "ramfs", MS_SYNCHRONOUS, ramfs_args) < 0) {
1142                 condlog(0, "cannot mount ramfs on " CALLOUT_DIR);
1143                 return -1;
1144         }
1145         condlog(4, "mount ramfs on " CALLOUT_DIR);
1146
1147         /*
1148          * populate the ramfs with callout binaries
1149          */
1150         vector_foreach_slot (conf->binvec, bin,i) {
1151                 if (copytodir(bin, CALLOUT_DIR) < 0) {
1152                         condlog(0, "cannot copy %s in ramfs", bin);
1153                         exit_daemon(1);
1154                 }
1155                 condlog(4, "cp %s in ramfs", bin);
1156         }
1157         strvec_free(conf->binvec);
1158
1159         /*
1160          * bind the ramfs to :
1161          * /sbin : default home of multipath ...
1162          * /bin  : default home of scsi_id ...
1163          * /tmp  : home of scsi_id temp files
1164          */
1165         if (mount(CALLOUT_DIR, "/sbin", NULL, MS_BIND, NULL) < 0) {
1166                 condlog(0, "cannot bind ramfs on /sbin");
1167                 return -1;
1168         }
1169         condlog(4, "bind ramfs on /sbin");
1170         if (mount(CALLOUT_DIR, "/bin", NULL, MS_BIND, NULL) < 0) {
1171                 condlog(0, "cannot bind ramfs on /bin");
1172                 return -1;
1173         }
1174         condlog(4, "bind ramfs on /bin");
1175         if (mount(CALLOUT_DIR, "/tmp", NULL, MS_BIND, NULL) < 0) {
1176                 condlog(0, "cannot bind ramfs on /tmp");
1177                 return -1;
1178         }
1179         condlog(4, "bind ramfs on /tmp");
1180
1181         return 0;
1182 }
1183 #endif
1184
1185 static void *
1186 signal_set(int signo, void (*func) (int))
1187 {
1188         int r;
1189         struct sigaction sig;
1190         struct sigaction osig;
1191
1192         sig.sa_handler = func;
1193         sigemptyset(&sig.sa_mask);
1194         sig.sa_flags = 0;
1195
1196         r = sigaction(signo, &sig, &osig);
1197
1198         if (r < 0)
1199                 return (SIG_ERR);
1200         else
1201                 return (osig.sa_handler);
1202 }
1203
1204 static void
1205 sighup (int sig)
1206 {
1207         condlog(2, "SIGHUP received");
1208
1209 #ifdef _DEBUG_
1210         dbg_free_final(NULL);
1211 #endif
1212 }
1213
1214 static void
1215 sigend (int sig)
1216 {
1217         exit_daemon(0);
1218 }
1219
1220 static void
1221 signal_init(void)
1222 {
1223         signal_set(SIGHUP, sighup);
1224         signal_set(SIGINT, sigend);
1225         signal_set(SIGTERM, sigend);
1226         signal_set(SIGKILL, sigend);
1227 }
1228
1229 static void
1230 setscheduler (void)
1231 {
1232         int res;
1233         static struct sched_param sched_param = {
1234                 sched_priority: 99
1235         };
1236
1237         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1238
1239         if (res == -1)
1240                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1241         return;
1242 }
1243
1244 static void
1245 set_oom_adj (int val)
1246 {
1247         FILE *fp;
1248
1249         fp = fopen("/proc/self/oom_adj", "w");
1250
1251         if (!fp)
1252                 return;
1253
1254         fprintf(fp, "%i", val);
1255         fclose(fp);
1256 }
1257         
1258 static int
1259 child (void * param)
1260 {
1261         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1262         pthread_attr_t attr;
1263         struct paths * allpaths;
1264
1265         mlockall(MCL_CURRENT | MCL_FUTURE);
1266
1267         if (logsink)
1268                 log_thread_start();
1269
1270         condlog(2, "--------start up--------");
1271         condlog(2, "read " DEFAULT_CONFIGFILE);
1272
1273         if (load_config(DEFAULT_CONFIGFILE))
1274                 exit(1);
1275
1276         setlogmask(LOG_UPTO(conf->verbosity + 3));
1277
1278         /*
1279          * fill the voids left in the config file
1280          */
1281         if (!conf->binvec) {
1282                 conf->binvec = vector_alloc();
1283                 push_callout("/sbin/scsi_id");
1284         }
1285         if (!conf->multipath) {
1286                 conf->multipath = MULTIPATH;
1287                 push_callout(conf->multipath);
1288         }
1289         if (!conf->checkint) {
1290                 conf->checkint = CHECKINT;
1291                 conf->max_checkint = MAX_CHECKINT;
1292         }
1293
1294         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1295                 if (logsink)
1296                         log_thread_stop();
1297
1298                 exit(1);
1299         }
1300         signal_init();
1301         setscheduler();
1302         set_oom_adj(-17);
1303         allpaths = init_paths();
1304
1305         if (!allpaths)
1306                 exit(1);
1307
1308         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1309                 condlog(0, "can not find sysfs mount point");
1310                 exit(1);
1311         }
1312
1313 #ifdef CLONE_NEWNS
1314         if (prepare_namespace() < 0) {
1315                 condlog(0, "cannot prepare namespace");
1316                 exit_daemon(1);
1317         }
1318 #endif
1319
1320         /*
1321          * fetch paths and multipaths lists
1322          * no paths and/or no multipaths are valid scenarii
1323          * vectors maintenance will be driven by events
1324          */
1325         path_discovery(allpaths->pathvec, conf, DI_SYSFS | DI_WWID);
1326         get_dm_mpvec(allpaths);
1327
1328         /*
1329          * start threads
1330          */
1331         pthread_attr_init(&attr);
1332         pthread_attr_setstacksize(&attr, 64 * 1024);
1333         
1334         pthread_create(&check_thr, &attr, checkerloop, allpaths);
1335         pthread_create(&uevent_thr, &attr, ueventloop, allpaths);
1336         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, allpaths);
1337         pthread_join(check_thr, NULL);
1338         pthread_join(uevent_thr, NULL);
1339         pthread_join(uxlsnr_thr, NULL);
1340
1341         return 0;
1342 }
1343
1344 int
1345 main (int argc, char *argv[])
1346 {
1347         extern char *optarg;
1348         extern int optind;
1349         int arg;
1350         int err;
1351         void * child_stack;
1352         
1353         logsink = 1;
1354
1355         if (getuid() != 0) {
1356                 fprintf(stderr, "need to be root\n");
1357                 exit(1);
1358         }
1359
1360         /* make sure we don't lock any path */
1361         chdir("/");
1362         umask(umask(077) | 022);
1363
1364         child_stack = (void *)malloc(CHILD_STACK_SIZE);
1365
1366         if (!child_stack)
1367                 exit(1);
1368
1369         conf = alloc_config();
1370
1371         if (!conf)
1372                 exit(1);
1373
1374         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1375         switch(arg) {
1376                 case 'd':
1377                         logsink = 0;
1378                         break;
1379                 case 'v':
1380                         if (sizeof(optarg) > sizeof(char *) ||
1381                             !isdigit(optarg[0]))
1382                                 exit(1);
1383
1384                         conf->verbosity = atoi(optarg);
1385                         break;
1386                 case 'k':
1387                         uxclnt(optarg);
1388                         exit(0);
1389                 default:
1390                         ;
1391                 }
1392         }
1393
1394 #ifdef CLONE_NEWNS      /* recent systems have clone() */
1395
1396 #    if defined(__hppa__) || defined(__powerpc64__)
1397         err = clone(child, child_stack, CLONE_NEWNS, NULL);
1398 #    elif defined(__ia64__)
1399         err = clone2(child, child_stack,
1400                      CHILD_STACK_SIZE, CLONE_NEWNS, NULL,
1401                      NULL, NULL, NULL);
1402 #    else
1403         err = clone(child, child_stack + CHILD_STACK_SIZE, CLONE_NEWNS, NULL);
1404 #    endif
1405         if (err < 0)
1406                 exit (1);
1407
1408         exit(0);
1409 #else                   /* older system fallback to fork() */
1410         err = fork();
1411         
1412         if (err < 0)
1413                 exit (1);
1414
1415         return (child(child_stack));
1416 #endif
1417
1418 }