[multipathd] enable the defered failback feature
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 #include <string.h>
2 #include <pthread.h>
3 #include <stdio.h>
4 #include <unistd.h>
5 #include <linux/unistd.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <libdevmapper.h>
11 #include <signal.h>
12 #include <wait.h>
13 #include <sched.h>
14 #include <errno.h>
15 #include <sys/mount.h>
16 #include <sys/mman.h>
17
18 /*
19  * libsysfs
20  */
21 #include <sysfs/libsysfs.h>
22 #include <sysfs/dlist.h>
23
24 /*
25  * libcheckers
26  */
27 #include <checkers.h>
28 #include <path_state.h>
29
30 /*
31  * libmultipath
32  */
33 #include <parser.h>
34 #include <vector.h>
35 #include <memory.h>
36 #include <config.h>
37 #include <callout.h>
38 #include <util.h>
39 #include <blacklist.h>
40 #include <hwtable.h>
41 #include <defaults.h>
42 #include <structs.h>
43 #include <dmparser.h>
44 #include <devmapper.h>
45 #include <dict.h>
46 #include <discovery.h>
47 #include <debug.h>
48 #include <propsel.h>
49 #include <uevent.h>
50 #include <switchgroup.h>
51
52 #include "main.h"
53 #include "copy.h"
54 #include "clone_platform.h"
55 #include "pidfile.h"
56
57 #define FILE_NAME_SIZE 256
58 #define CMDSIZE 160
59
60 #define CALLOUT_DIR "/var/cache/multipathd"
61
62 #define LOG_MSG(a,b) \
63         if (strlen(a)) { \
64                 log_safe(LOG_WARNING, "%s: %s", b, a); \
65                 memset(a, 0, MAX_CHECKER_MSG_SIZE); \
66         }
67
68 #ifdef LCKDBG
69 #define lock(a) \
70         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_mutex_lock(a)
72 #define unlock(a) \
73         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_unlock(a)
75 #else
76 #define lock(a) pthread_mutex_lock(a)
77 #define unlock(a) pthread_mutex_unlock(a)
78 #endif
79
80 /*
81  * structs
82  */
83 struct paths {
84         pthread_mutex_t *lock;
85         vector pathvec;
86         vector mpvec;
87 };
88
89 struct event_thread {
90         pthread_t *thread;
91         int event_nr;
92         char mapname[WWID_SIZE];
93         struct paths *allpaths;
94 };
95
96 static void *
97 alloc_waiter (void)
98 {
99
100         struct event_thread * wp;
101
102         wp = MALLOC(sizeof(struct event_thread));
103
104         if (!wp)
105                 return NULL;
106
107         wp->thread = MALLOC(sizeof(pthread_t));
108
109         if (!wp->thread)
110                 goto out;
111                 
112         return wp;
113
114 out:
115         free(wp);
116         log_safe(LOG_ERR, "failed to alloc waiter");
117         return NULL;
118 }
119
120 static void
121 set_paths_owner (struct paths * allpaths, struct multipath * mpp)
122 {
123         int i;
124         struct path * pp;
125
126         vector_foreach_slot (allpaths->pathvec, pp, i) {
127                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
128                         log_safe(LOG_DEBUG, "%s ownership set",
129                                  pp->dev_t);
130                         pp->mpp = mpp;
131                 }
132         }
133 }
134
135 static int
136 update_multipath_table (struct multipath *mpp, vector pathvec)
137 {
138         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
139                 return 1;
140
141         if(disassemble_map(pathvec, mpp->params, mpp))
142                 return 1;
143
144         return 0;
145 }
146
147 static int
148 update_multipath_status (struct multipath *mpp)
149 {
150         if(dm_get_status(mpp->alias, mpp->status))
151                 return 1;
152
153         if (disassemble_status(mpp->status, mpp))
154                 return 1;
155
156         return 0;
157 }
158
159 static int
160 update_multipath_strings (struct multipath *mpp, vector pathvec)
161 {
162         if (update_multipath_table(mpp, pathvec))
163                 return 1;
164
165         if (update_multipath_status(mpp))
166                 return 1;
167
168         return 0;
169 }
170
171 static int
172 setup_multipath (struct paths * allpaths, struct multipath * mpp)
173 {
174         char * wwid;
175
176         wwid = get_mpe_wwid(mpp->alias);
177
178         if (wwid) {
179                 strncpy(mpp->wwid, wwid, WWID_SIZE);
180                 wwid = NULL;
181         } else
182                 strncpy(mpp->wwid, mpp->alias, WWID_SIZE);
183
184         log_safe(LOG_DEBUG, "discovered map %s", mpp->alias);
185
186         if (update_multipath_strings(mpp, allpaths->pathvec))
187                 goto out;
188
189         set_paths_owner(allpaths, mpp);
190         mpp->mpe = find_mpe(mpp->wwid);
191         select_pgfailback(mpp);
192
193         return 0;
194 out:
195         free_multipath(mpp, KEEP_PATHS);
196         log_safe(LOG_ERR, "failed to setup multipath");
197         return 1;
198 }
199
200 static void
201 switch_pathgroup (struct multipath * mpp)
202 {
203         struct pathgroup * pgp;
204         struct path * pp;
205         int i, j;
206         
207         if (!mpp || mpp->pgfailback == FAILBACK_MANUAL)
208                 return;
209         /*
210          * Refresh path priority values
211          */
212         vector_foreach_slot (mpp->pg, pgp, i)
213                 vector_foreach_slot (pgp->paths, pp, j)
214                         pathinfo(pp, conf->hwtable, DI_PRIO);
215
216         select_path_group(mpp); /* sets mpp->nextpg */
217         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
218         
219         if (pgp && pgp->status != PGSTATE_ACTIVE) {
220                 dm_switchgroup(mpp->alias, mpp->nextpg);
221                 log_safe(LOG_NOTICE, "%s: switch to path group #%i",
222                          mpp->alias, mpp->nextpg);
223         }
224 }
225
226 static int
227 update_multipath (struct paths *allpaths, char *mapname)
228 {
229         struct multipath *mpp;
230         struct pathgroup  *pgp;
231         struct path *pp;
232         int i, j;
233         int r = 1;
234
235         lock(allpaths->lock);
236         mpp = find_mp(allpaths->mpvec, mapname);
237
238         if (!mpp)
239                 goto out;
240
241         free_pgvec(mpp->pg, KEEP_PATHS);
242         mpp->pg = NULL;
243
244         setup_multipath(allpaths, mpp);
245
246         /*
247          * compare checkers states with DM states
248          */
249         vector_foreach_slot (mpp->pg, pgp, i) {
250                 vector_foreach_slot (pgp->paths, pp, j) {
251                         if (pp->dmstate != PSTATE_FAILED)
252                                 continue;
253
254                         if (pp->state != PATH_DOWN) {
255                                 log_safe(LOG_NOTICE, "%s: mark as failed",
256                                         pp->dev_t);
257                                 pp->state = PATH_DOWN;
258
259                                 /*
260                                  * if opportune,
261                                  * schedule the next check earlier
262                                  */
263                                 if (pp->tick > conf->checkint)
264                                         pp->tick = conf->checkint;
265                         }
266                 }
267         }
268         r = 0;
269 out:
270         unlock(allpaths->lock);
271
272         if (r)
273                 log_safe(LOG_ERR, "failed to update multipath");
274
275         return r;
276 }
277
278 /*
279  * returns the reschedule delay
280  * negative means *stop*
281  */
282 static int
283 waiteventloop (struct event_thread * waiter)
284 {
285         struct dm_task *dmt;
286         int event_nr;
287         int r = 1; /* upon problem reschedule 1s later */
288
289         if (!waiter->event_nr)
290                 waiter->event_nr = dm_geteventnr(waiter->mapname);
291
292         if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
293                 goto out;
294
295         if (!dm_task_set_name(dmt, waiter->mapname))
296                 goto out;
297
298         if (waiter->event_nr && !dm_task_set_event_nr(dmt, waiter->event_nr))
299                 goto out;
300
301         dm_task_no_open_count(dmt);
302
303         dm_task_run(dmt);
304
305         waiter->event_nr++;
306
307         /*
308          * upon event ...
309          */
310         while (1) {
311                 log_safe(LOG_NOTICE, "devmap event (%i) on %s",
312                                 waiter->event_nr, waiter->mapname);
313
314                 /*
315                  * event might be :
316                  *
317                  * 1) a table reload, which means our mpp structure is
318                  *    obsolete : refresh it through update_multipath()
319                  * 2) a path failed by DM : mark as such through
320                  *    update_multipath()
321                  * 3) map has gone away : stop the thread.
322                  * 4) a path reinstate : nothing to do
323                  * 5) a switch group : nothing to do
324                  */
325                 if (update_multipath(waiter->allpaths, waiter->mapname)) {
326                         r = -1; /* stop the thread */
327                         goto out;
328                 }
329                 event_nr = dm_geteventnr(waiter->mapname);
330
331                 if (waiter->event_nr == event_nr)
332                         break;
333
334                 waiter->event_nr = event_nr;
335         }
336
337 out:
338         dm_task_destroy(dmt);
339         return r;
340 }
341
342 static void *
343 waitevent (void * et)
344 {
345         int r;
346         struct event_thread *waiter;
347
348         mlockall(MCL_CURRENT | MCL_FUTURE);
349
350         waiter = (struct event_thread *)et;
351         pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
352
353         while (1) {
354                 r = waiteventloop(waiter);
355
356                 if (r < 0)
357                         break;
358
359                 sleep(r);
360         }
361
362         pthread_exit(waiter->thread);
363
364         return NULL;
365 }
366
367 static void
368 free_waiter (struct event_thread * wp)
369 {
370         free(wp->thread);
371         free(wp);
372 }
373
374 static int
375 stop_waiter_thread (struct multipath * mpp, struct paths * allpaths)
376 {
377         struct event_thread * wp;
378
379         if (!mpp)
380                 return 0;
381
382         wp = (struct event_thread *)mpp->waiter;
383
384         if (!wp)
385                 return 1;
386
387         log_safe(LOG_NOTICE, "reap event checker : %s",
388                 wp->mapname);
389
390         pthread_cancel(*wp->thread);
391         free_waiter(wp);
392
393         return 0;
394 }
395
396 static int
397 start_waiter_thread (struct multipath * mpp, struct paths * allpaths)
398 {
399         pthread_attr_t attr;
400         struct event_thread * wp;
401
402         if (!mpp)
403                 return 0;
404
405         if (pthread_attr_init(&attr))
406                 return 1;
407
408         pthread_attr_setstacksize(&attr, 32 * 1024);
409         wp = alloc_waiter();
410
411         if (!wp)
412                 return 1;
413
414         mpp->waiter = (void *)wp;
415         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
416         wp->allpaths = allpaths;
417
418         if (pthread_create(wp->thread, &attr, waitevent, wp)) {
419                 log_safe(LOG_ERR, "%s: cannot create event checker",
420                          wp->mapname);
421                 goto out;
422         }
423         log_safe(LOG_NOTICE, "%s: event checker started", wp->mapname);
424
425         return 0;
426 out:
427         free_waiter(wp);
428         mpp->waiter = NULL;
429         log_safe(LOG_ERR, "failed to start waiter thread");
430         return 1;
431 }
432
433 static void
434 remove_map (struct multipath * mpp, struct paths * allpaths)
435 {
436         int i;
437
438         stop_waiter_thread(mpp, allpaths);
439         i = find_slot(allpaths->mpvec, (void *)mpp);
440         vector_del_slot(allpaths->mpvec, i);
441         free_multipath(mpp, KEEP_PATHS);
442 }
443
444 static int
445 uev_add_map (char * devname, struct paths * allpaths)
446 {
447         int major, minor;
448         char dev_t[BLK_DEV_SIZE];
449         char * buff;
450         struct multipath * mpp;
451
452         if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
453                 return 1;
454
455         if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
456                 return 1;
457
458         buff = dm_mapname(major, minor, "multipath");
459                 
460         if (!buff)
461                 return 1;
462         
463         mpp = find_mp(allpaths->mpvec, buff);
464
465         if (mpp) {
466                 /*
467                  * devmap already in mpvec
468                  * but remove DM uevent are somewhet unreliable
469                  * so for now consider safer to remove and re-add the map
470                  */
471                 log_safe(LOG_NOTICE, "%s: remove dead config", mpp->alias);
472                 remove_map(mpp, allpaths);
473                 mpp = NULL;
474         }
475         if (!mpp) {
476                 mpp = alloc_multipath();
477
478                 if (!mpp)
479                         return 1;
480
481                 mpp->minor = minor;
482                 mpp->alias = MALLOC(strlen(buff) + 1);
483
484                 if (!mpp->alias)
485                         goto out;
486
487                 strncat(mpp->alias, buff, strlen(buff));
488
489                 dm_get_map(mpp->alias, &mpp->size, mpp->params);
490                 dm_get_status(mpp->alias, mpp->status);
491
492                 if (setup_multipath(allpaths, mpp))
493                         return 1; /* mpp freed in setup_multipath */
494
495                 if (!vector_alloc_slot(allpaths->mpvec))
496                         goto out;
497
498                 vector_set_slot(allpaths->mpvec, mpp);
499                 set_paths_owner(allpaths, mpp);
500
501                 if (start_waiter_thread(mpp, allpaths))
502                         goto out;
503         }
504         return 0;
505 out:
506         free_multipath(mpp, KEEP_PATHS);
507         return 1;
508 }
509
510 static int
511 uev_remove_map (char * devname, struct paths * allpaths)
512 {
513         int minor;
514         struct multipath * mpp;
515
516         mpp->minor = atoi(devname + 3);
517         mpp = find_mp_by_minor(allpaths->mpvec, minor);
518
519         if (mpp)
520                 remove_map(mpp, allpaths);
521
522         return 0;
523 }
524
525 static int
526 uev_add_path (char * devname, struct paths * allpaths)
527 {
528         struct path * pp;
529
530         pp = find_path_by_dev(allpaths->pathvec, devname);
531
532         if (pp) {
533                 log_safe(LOG_INFO, "%s: already in pathvec");
534                 return 0;
535         }
536         log_safe(LOG_NOTICE, "add %s path checker", devname);
537         pp = store_pathinfo(allpaths->pathvec, conf->hwtable,
538                        devname, DI_SYSFS | DI_WWID);
539
540         if (!pp)
541                 return 1;
542
543         pp->mpp = find_mp_by_wwid(allpaths->mpvec, pp->wwid);
544         log_safe(LOG_DEBUG, "%s: ownership set to %s",
545                  pp->dev_t, pp->mpp->alias);
546
547         return 0;
548 }
549
550 static int
551 uev_remove_path (char * devname, struct paths * allpaths)
552 {
553         int i;
554         struct path * pp;
555
556         pp = find_path_by_dev(allpaths->pathvec, devname);
557
558         if (!pp) {
559                 log_safe(LOG_INFO, "%s: not in pathvec");
560                 return 0;
561         }
562         log_safe(LOG_NOTICE, "remove %s path checker", devname);
563         i = find_slot(allpaths->pathvec, (void *)pp);
564         vector_del_slot(allpaths->pathvec, i);
565         free_path(pp);
566
567         return 0;
568 }
569
570 int 
571 uev_trigger (struct uevent * uev, void * trigger_data)
572 {
573         int r = 0;
574         char devname[32];
575         struct paths * allpaths;
576
577         allpaths = (struct paths *)trigger_data;
578         lock(allpaths->lock);
579
580         if (strncmp(uev->devpath, "/block", 6))
581                 goto out;
582
583         basename(uev->devpath, devname);
584
585         /*
586          * device map add/remove event
587          */
588         if (!strncmp(devname, "dm-", 3)) {
589                 condlog(2, "%s %s devmap", uev->action, devname);
590
591                 if (!strncmp(uev->action, "add", 3)) {
592                         r = uev_add_map(devname, allpaths);
593                         goto out;
594                 }
595                 if (!strncmp(uev->action, "remove", 6)) {
596                         r = uev_remove_map(devname, allpaths);
597                         goto out;
598                 }
599                 goto out;
600         }
601         
602         /*
603          * path add/remove event
604          */
605         if (blacklist(conf->blist, devname))
606                 goto out;
607
608         if (!strncmp(uev->action, "add", 3)) {
609                 r = uev_add_path(devname, allpaths);
610                 goto out;
611         }
612         if (!strncmp(uev->action, "remove", 6)) {
613                 r = uev_remove_path(devname, allpaths);
614                 goto out;
615         }
616
617 out:
618         FREE(uev);
619         unlock(allpaths->lock);
620         return r;
621 }
622
623 static void *
624 ueventloop (void * ap)
625 {
626         uevent_listen(&uev_trigger, ap);
627
628         return NULL;
629 }
630
631 static void
632 strvec_free (vector vec)
633 {
634         int i;
635         char * str;
636
637         vector_foreach_slot (vec, str, i)
638                 if (str)
639                         FREE(str);
640
641         vector_free(vec);
642 }
643
644 static int
645 exit_daemon (int status)
646 {
647         if (status != 0)
648                 fprintf(stderr, "bad exit status. see daemon.log\n");
649
650         log_safe(LOG_INFO, "umount ramfs");
651         umount(CALLOUT_DIR);
652
653         log_safe(LOG_INFO, "unlink pidfile");
654         unlink(DEFAULT_PIDFILE);
655
656         log_safe(LOG_NOTICE, "--------shut down-------");
657         log_thread_stop();
658         exit(status);
659 }
660
661 /*
662  * caller must have locked the path list before calling that function
663  */
664 static int
665 get_dm_mpvec (struct paths * allpaths)
666 {
667         int i;
668         struct multipath * mpp;
669
670         if (dm_get_maps(allpaths->mpvec, "multipath"))
671                 return 1;
672
673         vector_foreach_slot (allpaths->mpvec, mpp, i) {
674                 setup_multipath(allpaths, mpp);
675                 mpp->minor = dm_get_minor(mpp->alias);
676                 start_waiter_thread(mpp, allpaths);
677         }
678
679         return 0;
680 }
681
682 static void
683 fail_path (struct path * pp)
684 {
685         if (!pp->mpp)
686                 return;
687
688         log_safe(LOG_NOTICE, "checker failed path %s in map %s",
689                  pp->dev_t, pp->mpp->alias);
690
691         dm_fail_path(pp->mpp->alias, pp->dev_t);
692 }
693
694 /*
695  * caller must have locked the path list before calling that function
696  */
697 static void
698 reinstate_path (struct path * pp)
699 {
700         if (pp->mpp) {
701                 if (dm_reinstate(pp->mpp->alias, pp->dev_t))
702                         log_safe(LOG_ERR, "%s: reinstate failed", pp->dev_t);
703                 else
704                         log_safe(LOG_NOTICE, "%s: reinstated", pp->dev_t);
705         }
706 }
707
708 static void *
709 checkerloop (void *ap)
710 {
711         struct paths *allpaths;
712         struct path *pp;
713         int i;
714         int newstate;
715         char checker_msg[MAX_CHECKER_MSG_SIZE];
716
717         mlockall(MCL_CURRENT | MCL_FUTURE);
718
719         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
720         allpaths = (struct paths *)ap;
721
722         log_safe(LOG_NOTICE, "path checkers start up");
723
724         while (1) {
725                 lock(allpaths->lock);
726                 log_safe(LOG_DEBUG, "tick");
727
728                 vector_foreach_slot (allpaths->pathvec, pp, i) {
729                         if (pp->tick) {
730                                 /*
731                                  * don't check this path yet
732                                  */
733                                 pp->tick--;
734                                 continue;
735                         }
736
737                         /*
738                          * provision a next check soonest,
739                          * in case we exit abnormaly from here
740                          */
741                         pp->tick = conf->checkint;
742                         
743                         if (!pp->checkfn) {
744                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
745                                 select_checkfn(pp);
746                         }
747
748                         if (!pp->checkfn) {
749                                 log_safe(LOG_ERR, "%s: checkfn is void",
750                                          pp->dev);
751                                 continue;
752                         }
753                         newstate = pp->checkfn(pp->fd, checker_msg,
754                                                &pp->checker_context);
755                         
756                         if (newstate != pp->state) {
757                                 pp->state = newstate;
758                                 LOG_MSG(checker_msg, pp->dev_t);
759
760                                 /*
761                                  * upon state change, reset the checkint
762                                  * to the shortest delay
763                                  */
764                                 pp->checkint = conf->checkint;
765
766                                 if (newstate == PATH_DOWN ||
767                                     newstate == PATH_SHAKY) {
768                                         /*
769                                          * proactively fail path in the DM
770                                          */
771                                         fail_path(pp);
772
773                                         /*
774                                          * cancel scheduled failback
775                                          */
776                                         pp->mpp->failback_tick = 0;
777                                         continue;
778                                 }
779
780                                 /*
781                                  * reinstate this path
782                                  */
783                                 reinstate_path(pp);
784
785                                 /*
786                                  * need to switch group ?
787                                  */
788                                 update_multipath_strings(pp->mpp,
789                                                          allpaths->pathvec);
790
791                                 /*
792                                  * schedule defered failback
793                                  */
794                                 if (pp->mpp->pgfailback > 0)
795                                         pp->mpp->failback_tick =
796                                                 pp->mpp->pgfailback;
797
798                                 if (pp->mpp->pgfailback == FAILBACK_IMMEDIATE)
799                                         switch_pathgroup(pp->mpp);
800                         }
801                         else if (newstate == PATH_UP) {
802                                 /*
803                                  * PATH_UP for last two checks
804                                  * defered failback getting sooner
805                                  */
806                                 if (pp->mpp->pgfailback > 0) {
807                                         if (pp->mpp->failback_tick > 0) {
808                                                 pp->mpp->failback_tick--;
809
810                                                 if (!pp->mpp->failback_tick)
811                                                         switch_pathgroup(pp->mpp);
812                                         }
813                                 }
814                                 
815                                 /*
816                                  * and double the next check delay.
817                                  * max at conf->max_checkint
818                                  */
819                                 if (pp->checkint < (conf->max_checkint / 2))
820                                         pp->checkint = 2 * pp->checkint;
821                                 else
822                                         pp->checkint = conf->max_checkint;
823
824                                 pp->tick = pp->checkint;
825                                 log_safe(LOG_DEBUG, "%s: delay next check %is",
826                                                 pp->dev_t, pp->tick);
827                         }
828                         pp->state = newstate;
829                 }
830                 unlock(allpaths->lock);
831                 sleep(1);
832         }
833         return NULL;
834 }
835
836 static struct paths *
837 init_paths (void)
838 {
839         struct paths *allpaths;
840
841         allpaths = MALLOC(sizeof(struct paths));
842
843         if (!allpaths)
844                 return NULL;
845
846         allpaths->lock = 
847                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
848
849         if (!allpaths->lock)
850                 goto out;
851
852         allpaths->pathvec = vector_alloc();
853
854         if (!allpaths->pathvec)
855                 goto out1;
856                 
857         allpaths->mpvec = vector_alloc();
858
859         if (!allpaths->mpvec)
860                 goto out2;
861         
862         pthread_mutex_init(allpaths->lock, NULL);
863
864         return allpaths;
865
866 out2:
867         vector_free(allpaths->pathvec);
868 out1:
869         FREE(allpaths->lock);
870 out:
871         FREE(allpaths);
872         log_safe(LOG_ERR, "failed to init paths");
873         return NULL;
874 }
875
876 /*
877  * this logic is all about keeping callouts working in case of
878  * system disk outage (think system over SAN)
879  * this needs the clone syscall, so don't bother if not present
880  * (Debian Woody)
881  */
882 #ifdef CLONE_NEWNS
883 static int
884 prepare_namespace(void)
885 {
886         mode_t mode = S_IRWXU;
887         struct stat *buf;
888         char ramfs_args[64];
889         int i;
890         int fd;
891         char * bin;
892         size_t size = 10;
893         struct stat statbuf;
894         
895         buf = MALLOC(sizeof(struct stat));
896
897         /*
898          * create a temp mount point for ramfs
899          */
900         if (stat(CALLOUT_DIR, buf) < 0) {
901                 if (mkdir(CALLOUT_DIR, mode) < 0) {
902                         log_safe(LOG_ERR, "cannot create " CALLOUT_DIR);
903                         return -1;
904                 }
905                 log_safe(LOG_DEBUG, "created " CALLOUT_DIR);
906         }
907
908         /*
909          * compute the optimal ramdisk size
910          */
911         vector_foreach_slot (conf->binvec, bin,i) {
912                 if ((fd = open(bin, O_RDONLY)) < 0) {
913                         log_safe(LOG_ERR, "cannot open %s", bin);
914                         return -1;
915                 }
916                 if (fstat(fd, &statbuf) < 0) {
917                         log_safe(LOG_ERR, "cannot stat %s", bin);
918                         return -1;
919                 }
920                 size += statbuf.st_size;
921                 close(fd);
922         }
923         log_safe(LOG_INFO, "ramfs maxsize is %u", (unsigned int) size);
924         
925         /*
926          * mount the ramfs
927          */
928         if (safe_sprintf(ramfs_args, "maxsize=%u", (unsigned int) size)) {
929                 fprintf(stderr, "ramfs_args too small\n");
930                 return -1;
931         }
932         if (mount(NULL, CALLOUT_DIR, "ramfs", MS_SYNCHRONOUS, ramfs_args) < 0) {
933                 log_safe(LOG_ERR, "cannot mount ramfs on " CALLOUT_DIR);
934                 return -1;
935         }
936         log_safe(LOG_DEBUG, "mount ramfs on " CALLOUT_DIR);
937
938         /*
939          * populate the ramfs with callout binaries
940          */
941         vector_foreach_slot (conf->binvec, bin,i) {
942                 if (copytodir(bin, CALLOUT_DIR) < 0) {
943                         log_safe(LOG_ERR, "cannot copy %s in ramfs", bin);
944                         exit_daemon(1);
945                 }
946                 log_safe(LOG_DEBUG, "cp %s in ramfs", bin);
947         }
948         strvec_free(conf->binvec);
949
950         /*
951          * bind the ramfs to :
952          * /sbin : default home of multipath ...
953          * /bin  : default home of scsi_id ...
954          * /tmp  : home of scsi_id temp files
955          */
956         if (mount(CALLOUT_DIR, "/sbin", NULL, MS_BIND, NULL) < 0) {
957                 log_safe(LOG_ERR, "cannot bind ramfs on /sbin");
958                 return -1;
959         }
960         log_safe(LOG_DEBUG, "bind ramfs on /sbin");
961         if (mount(CALLOUT_DIR, "/bin", NULL, MS_BIND, NULL) < 0) {
962                 log_safe(LOG_ERR, "cannot bind ramfs on /bin");
963                 return -1;
964         }
965         log_safe(LOG_DEBUG, "bind ramfs on /bin");
966         if (mount(CALLOUT_DIR, "/tmp", NULL, MS_BIND, NULL) < 0) {
967                 log_safe(LOG_ERR, "cannot bind ramfs on /tmp");
968                 return -1;
969         }
970         log_safe(LOG_DEBUG, "bind ramfs on /tmp");
971
972         return 0;
973 }
974 #endif
975
976 static void *
977 signal_set(int signo, void (*func) (int))
978 {
979         int r;
980         struct sigaction sig;
981         struct sigaction osig;
982
983         sig.sa_handler = func;
984         sigemptyset(&sig.sa_mask);
985         sig.sa_flags = 0;
986
987         r = sigaction(signo, &sig, &osig);
988
989         if (r < 0)
990                 return (SIG_ERR);
991         else
992                 return (osig.sa_handler);
993 }
994
995 static void
996 sighup (int sig)
997 {
998         log_safe(LOG_NOTICE, "SIGHUP received");
999
1000 #ifdef _DEBUG_
1001         dbg_free_final(NULL);
1002 #endif
1003 }
1004
1005 static void
1006 sigend (int sig)
1007 {
1008         exit_daemon(0);
1009 }
1010
1011 static void
1012 signal_init(void)
1013 {
1014         signal_set(SIGHUP, sighup);
1015         signal_set(SIGINT, sigend);
1016         signal_set(SIGTERM, sigend);
1017         signal_set(SIGKILL, sigend);
1018 }
1019
1020 static void
1021 setscheduler (void)
1022 {
1023         int res;
1024         static struct sched_param sched_param = {
1025                 sched_priority: 99
1026         };
1027
1028         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1029
1030         if (res == -1)
1031                 log_safe(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1032         return;
1033 }
1034
1035 static void
1036 set_oom_adj (int val)
1037 {
1038         FILE *fp;
1039
1040         fp = fopen("/proc/self/oom_adj", "w");
1041
1042         if (!fp)
1043                 return;
1044
1045         fprintf(fp, "%i", val);
1046         fclose(fp);
1047 }
1048         
1049 static int
1050 child (void * param)
1051 {
1052         pthread_t check_thr, uevent_thr;
1053         pthread_attr_t attr;
1054         struct paths * allpaths;
1055
1056         mlockall(MCL_CURRENT | MCL_FUTURE);
1057
1058         log_thread_start();
1059         log_safe(LOG_NOTICE, "--------start up--------");
1060         log_safe(LOG_NOTICE, "read " DEFAULT_CONFIGFILE);
1061
1062         if (load_config(DEFAULT_CONFIGFILE))
1063                 exit(1);
1064
1065         setlogmask(LOG_UPTO(conf->verbosity + 3));
1066
1067         /*
1068          * fill the voids left in the config file
1069          */
1070         if (!conf->binvec) {
1071                 conf->binvec = vector_alloc();
1072                 push_callout("/sbin/scsi_id");
1073         }
1074         if (!conf->multipath) {
1075                 conf->multipath = MULTIPATH;
1076                 push_callout(conf->multipath);
1077         }
1078         if (!conf->checkint) {
1079                 conf->checkint = CHECKINT;
1080                 conf->max_checkint = MAX_CHECKINT;
1081         }
1082
1083         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1084                 log_thread_stop();
1085                 exit(1);
1086         }
1087         signal_init();
1088         setscheduler();
1089         set_oom_adj(-17);
1090         allpaths = init_paths();
1091
1092         if (!allpaths)
1093                 exit(1);
1094
1095         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1096                 log_safe(LOG_ERR, "can not find sysfs mount point");
1097                 exit(1);
1098         }
1099
1100 #ifdef CLONE_NEWNS
1101         if (prepare_namespace() < 0) {
1102                 log_safe(LOG_ERR, "cannot prepare namespace");
1103                 exit_daemon(1);
1104         }
1105 #endif
1106
1107         /*
1108          * fetch paths and multipaths lists
1109          * no paths and/or no multipaths are valid scenarii
1110          * vectors maintenance will be driven by events
1111          */
1112         path_discovery(allpaths->pathvec, conf, DI_SYSFS | DI_WWID);
1113         get_dm_mpvec(allpaths);
1114
1115         /*
1116          * start threads
1117          */
1118         pthread_attr_init(&attr);
1119         pthread_attr_setstacksize(&attr, 64 * 1024);
1120         
1121         pthread_create(&check_thr, &attr, checkerloop, allpaths);
1122         pthread_create(&uevent_thr, &attr, ueventloop, allpaths);
1123         pthread_join(check_thr, NULL);
1124         pthread_join(uevent_thr, NULL);
1125
1126         return 0;
1127 }
1128
1129 int
1130 main (int argc, char *argv[])
1131 {
1132         extern char *optarg;
1133         extern int optind;
1134         int arg;
1135         int err;
1136         void * child_stack;
1137         
1138         if (getuid() != 0) {
1139                 fprintf(stderr, "need to be root\n");
1140                 exit(1);
1141         }
1142
1143         /* make sure we don't lock any path */
1144         chdir("/");
1145         umask(umask(077) | 022);
1146
1147         child_stack = (void *)malloc(CHILD_STACK_SIZE);
1148
1149         if (!child_stack)
1150                 exit(1);
1151
1152         conf = alloc_config();
1153
1154         if (!conf)
1155                 exit(1);
1156
1157         while ((arg = getopt(argc, argv, ":v:")) != EOF ) {
1158         switch(arg) {
1159                 case 'v':
1160                         if (sizeof(optarg) > sizeof(char *) ||
1161                             !isdigit(optarg[0]))
1162                                 exit(1);
1163
1164                         conf->verbosity = atoi(optarg);
1165                         break;
1166                 default:
1167                         ;
1168                 }
1169         }
1170
1171 #ifdef CLONE_NEWNS      /* recent systems have clone() */
1172
1173 #    if defined(__hppa__) || defined(__powerpc64__)
1174         err = clone(child, child_stack, CLONE_NEWNS, NULL);
1175 #    elif defined(__ia64__)
1176         err = clone2(child, child_stack,
1177                      CHILD_STACK_SIZE, CLONE_NEWNS, NULL,
1178                      NULL, NULL, NULL);
1179 #    else
1180         err = clone(child, child_stack + CHILD_STACK_SIZE, CLONE_NEWNS, NULL);
1181 #    endif
1182         if (err < 0)
1183                 exit (1);
1184
1185         exit(0);
1186 #else                   /* older system fallback to fork() */
1187         err = fork();
1188         
1189         if (err < 0)
1190                 exit (1);
1191
1192         return (child(child_stack));
1193 #endif
1194
1195 }