[libmultipath] move coalesce_path() to libmultipath/configure.c
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15
16 /*
17  * libsysfs
18  */
19 #include <sysfs/libsysfs.h>
20 #include <sysfs/dlist.h>
21
22 /*
23  * libcheckers
24  */
25 #include <checkers.h>
26 #include <path_state.h>
27
28 /*
29  * libmultipath
30  */
31 #include <parser.h>
32 #include <vector.h>
33 #include <memory.h>
34 #include <config.h>
35 #include <callout.h>
36 #include <util.h>
37 #include <blacklist.h>
38 #include <hwtable.h>
39 #include <defaults.h>
40 #include <structs.h>
41 #include <dmparser.h>
42 #include <devmapper.h>
43 #include <dict.h>
44 #include <discovery.h>
45 #include <debug.h>
46 #include <propsel.h>
47 #include <uevent.h>
48 #include <switchgroup.h>
49 #include <path_state.h>
50 #include <print.h>
51
52 #include "main.h"
53 #include "pidfile.h"
54 #include "uxlsnr.h"
55 #include "uxclnt.h"
56 #include "cli.h"
57 #include "cli_handlers.h"
58
59 #define FILE_NAME_SIZE 256
60 #define CMDSIZE 160
61
62 #define LOG_MSG(a,b) \
63         if (strlen(b)) { \
64                 condlog(a, "%s: %s", pp->dev_t, b); \
65                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
66         }
67
68 #ifdef LCKDBG
69 #define lock(a) \
70         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_mutex_lock(a)
72 #define unlock(a) \
73         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_unlock(a)
75 #define lock_cleanup_pop(a) \
76         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
77         pthread_cleanup_pop(1);
78 #else
79 #define lock(a) pthread_mutex_lock(a)
80 #define unlock(a) pthread_mutex_unlock(a)
81 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
82 #endif
83
84 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
85 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
86
87 /*
88  * structs
89  */
90 struct event_thread {
91         struct dm_task *dmt;
92         pthread_t thread;
93         int event_nr;
94         char mapname[WWID_SIZE];
95         struct vectors *vecs;
96 };
97
98 static struct event_thread *
99 alloc_waiter (void)
100 {
101
102         struct event_thread * wp;
103
104         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
105
106         return wp;
107 }
108
109 static void
110 free_waiter (void * data)
111 {
112         struct event_thread * wp = (struct event_thread *)data;
113
114         if (wp->dmt)
115                 dm_task_destroy(wp->dmt);
116         FREE(wp);
117 }
118
119 static void
120 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
121 {
122         struct event_thread * wp = (struct event_thread *)mpp->waiter;
123         pthread_t thread;
124         
125         if (!wp) {
126                 condlog(3, "%s: no waiter thread", mpp->alias);
127                 return;
128         }
129         thread = wp->thread;
130
131         if (!wp) {
132                 condlog(3, "%s: thread not started", mpp->alias);
133                 return;
134         }
135         condlog(2, "%s: stop event checker thread", wp->mapname);
136         pthread_kill(thread, SIGHUP);
137 }
138
139 static void
140 cleanup_lock (void * data)
141 {
142         pthread_mutex_unlock((pthread_mutex_t *)data);
143 }
144
145 static void
146 adopt_paths (struct vectors * vecs, struct multipath * mpp)
147 {
148         int i;
149         struct path * pp;
150
151         if (!mpp)
152                 return;
153
154         vector_foreach_slot (vecs->pathvec, pp, i) {
155                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
156                         condlog(4, "%s ownership set", pp->dev_t);
157                         pp->mpp = mpp;
158                 }
159         }
160 }
161
162 static void
163 orphan_path (struct path * pp)
164 {
165         pp->mpp = NULL;
166         pp->checkfn = NULL;
167         pp->dmstate = PSTATE_UNDEF;
168         pp->checker_context = NULL;
169         pp->getuid = NULL;
170         pp->getprio = NULL;
171         pp->getprio_selected = 0;
172
173         if (pp->fd >= 0)
174                 close(pp->fd);
175
176         pp->fd = -1;
177 }
178
179 static void
180 orphan_paths (struct vectors * vecs, struct multipath * mpp)
181 {
182         int i;
183         struct path * pp;
184
185         vector_foreach_slot (vecs->pathvec, pp, i) {
186                 if (pp->mpp == mpp) {
187                         condlog(4, "%s is orphaned", pp->dev_t);
188                         orphan_path(pp);
189                 }
190         }
191 }
192
193 static int
194 update_multipath_table (struct multipath *mpp, vector pathvec)
195 {
196         if (!mpp)
197                 return 1;
198
199         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
200                 return 1;
201
202         if (disassemble_map(pathvec, mpp->params, mpp))
203                 return 1;
204
205         return 0;
206 }
207
208 static int
209 update_multipath_status (struct multipath *mpp)
210 {
211         if (!mpp)
212                 return 1;
213
214         if(dm_get_status(mpp->alias, mpp->status))
215                 return 1;
216
217         if (disassemble_status(mpp->status, mpp))
218                 return 1;
219
220         return 0;
221 }
222
223 static int
224 update_multipath_strings (struct multipath *mpp, vector pathvec)
225 {
226         free_multipath_attributes(mpp);
227         free_pgvec(mpp->pg, KEEP_PATHS);
228         mpp->pg = NULL;
229
230         if (update_multipath_table(mpp, pathvec))
231                 return 1;
232
233         if (update_multipath_status(mpp))
234                 return 1;
235
236         return 0;
237 }
238
239 static void
240 set_multipath_wwid (struct multipath * mpp)
241 {
242         if (mpp->wwid)
243                 return;
244
245         dm_get_uuid(mpp->alias, mpp->wwid);
246 }
247
248 /*
249  * mpp->no_path_retry:
250  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
251  *   -1 (FAIL)  : fail_if_no_path
252  *    0 (UNDEF) : nothing
253  *   >0         : queue_if_no_path enabled, turned off after polling n times
254  */
255 static void
256 update_queue_mode_del_path(struct multipath *mpp)
257 {
258         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
259                 /*
260                  * Enter retry mode.
261                  * meaning of +1: retry_tick may be decremented in
262                  *                checkerloop before starting retry.
263                  */
264                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
265                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
266                         mpp->alias, mpp->no_path_retry);
267         }
268         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
269 }
270
271 static void
272 update_queue_mode_add_path(struct multipath *mpp)
273 {
274         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
275                 /* come back to normal mode from retry mode */
276                 mpp->retry_tick = 0;
277                 dm_queue_if_no_path(mpp->alias, 1);
278                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
279                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
280         }
281         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
282 }
283
284 static void
285 set_no_path_retry(struct multipath *mpp)
286 {
287         mpp->retry_tick = 0;
288         mpp->nr_active = pathcount(mpp, PATH_UP);
289         select_no_path_retry(mpp);
290
291         switch (mpp->no_path_retry) {
292         case NO_PATH_RETRY_UNDEF:
293                 break;
294         case NO_PATH_RETRY_FAIL:
295                 dm_queue_if_no_path(mpp->alias, 0);
296                 break;
297         case NO_PATH_RETRY_QUEUE:
298                 dm_queue_if_no_path(mpp->alias, 1);
299                 break;
300         default:
301                 dm_queue_if_no_path(mpp->alias, 1);
302                 if (mpp->nr_active == 0) {
303                         /* Enter retry mode */
304                         mpp->retry_tick = mpp->no_path_retry * conf->checkint;
305                         condlog(1, "%s: Entering recovery mode: max_retries=%d",
306                                 mpp->alias, mpp->no_path_retry);
307                 }
308                 break;
309         }
310 }
311
312 static struct hwentry *
313 extract_hwe_from_path(struct multipath * mpp)
314 {
315         struct path * pp;
316         struct pathgroup * pgp;
317
318         pgp = VECTOR_SLOT(mpp->pg, 0);
319         pp = VECTOR_SLOT(pgp->paths, 0);
320
321         return pp->hwe;
322 }
323
324 static void
325 remove_map (struct multipath * mpp, struct vectors * vecs)
326 {
327         int i;
328
329         stop_waiter_thread(mpp, vecs);
330
331         /*
332          * clear references to this map
333          */
334         orphan_paths(vecs, mpp);
335
336         /*
337          * purge the multipath vector
338          */
339         i = find_slot(vecs->mpvec, (void *)mpp);
340         vector_del_slot(vecs->mpvec, i);
341
342         /*
343          * final free
344          */
345         free_multipath(mpp, KEEP_PATHS);
346         mpp = NULL;
347 }
348
349 static void
350 remove_maps (struct vectors * vecs)
351 {
352         int i;
353         struct multipath * mpp;
354
355         vector_foreach_slot (vecs->mpvec, mpp, i) {
356                 remove_map(mpp, vecs);
357                 i--;
358         }
359
360         vector_free(vecs->mpvec);
361         vecs->mpvec = NULL;
362 }
363
364 static int
365 setup_multipath (struct vectors * vecs, struct multipath * mpp)
366 {
367         if (dm_get_info(mpp->alias, &mpp->dmi))
368                 goto out;
369
370         set_multipath_wwid(mpp);
371         mpp->mpe = find_mpe(mpp->wwid);
372         condlog(4, "discovered map %s", mpp->alias);
373
374         if (update_multipath_strings(mpp, vecs->pathvec))
375                 goto out;
376
377         adopt_paths(vecs, mpp);
378         mpp->hwe = extract_hwe_from_path(mpp);
379         select_pgfailback(mpp);
380         set_no_path_retry(mpp);
381
382         return 0;
383 out:
384         condlog(0, "%s: failed to setup multipath", mpp->alias);
385         remove_map(mpp, vecs);
386         return 1;
387 }
388
389 static int
390 need_switch_pathgroup (struct multipath * mpp, int refresh)
391 {
392         struct pathgroup * pgp;
393         struct path * pp;
394         int i, j;
395
396         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
397                 return 0;
398
399         /*
400          * Refresh path priority values
401          */
402         if (refresh)
403                 vector_foreach_slot (mpp->pg, pgp, i)
404                         vector_foreach_slot (pgp->paths, pp, j)
405                                 pathinfo(pp, conf->hwtable, DI_PRIO);
406
407         mpp->bestpg = select_path_group(mpp);
408
409         if (mpp->bestpg != mpp->nextpg)
410                 return 1;
411
412         return 0;
413 }
414
415 static void
416 switch_pathgroup (struct multipath * mpp)
417 {
418         dm_switchgroup(mpp->alias, mpp->bestpg);
419         condlog(2, "%s: switch to path group #%i",
420                  mpp->alias, mpp->bestpg);
421 }
422
423 static int
424 update_multipath (struct vectors *vecs, char *mapname)
425 {
426         struct multipath *mpp;
427         struct pathgroup  *pgp;
428         struct path *pp;
429         int i, j;
430         int r = 1;
431
432         mpp = find_mp_by_alias(vecs->mpvec, mapname);
433
434         if (!mpp)
435                 goto out;
436
437         free_pgvec(mpp->pg, KEEP_PATHS);
438         mpp->pg = NULL;
439
440         if (setup_multipath(vecs, mpp))
441                 goto out; /* mpp freed in setup_multipath */
442
443         /*
444          * compare checkers states with DM states
445          */
446         vector_foreach_slot (mpp->pg, pgp, i) {
447                 vector_foreach_slot (pgp->paths, pp, j) {
448                         if (pp->dmstate != PSTATE_FAILED)
449                                 continue;
450
451                         if (pp->state != PATH_DOWN) {
452                                 condlog(2, "%s: mark as failed", pp->dev_t);
453                                 pp->state = PATH_DOWN;
454                                 update_queue_mode_del_path(mpp);
455
456                                 /*
457                                  * if opportune,
458                                  * schedule the next check earlier
459                                  */
460                                 if (pp->tick > conf->checkint)
461                                         pp->tick = conf->checkint;
462                         }
463                 }
464         }
465         r = 0;
466 out:
467         if (r)
468                 condlog(0, "failed to update multipath");
469
470         return r;
471 }
472
473 static sigset_t unblock_sighup(void)
474 {
475         sigset_t set, old;
476
477         sigemptyset(&set);
478         sigaddset(&set, SIGHUP);
479         pthread_sigmask(SIG_UNBLOCK, &set, &old);
480         return old;
481 }
482
483 /*
484  * returns the reschedule delay
485  * negative means *stop*
486  */
487 static int
488 waiteventloop (struct event_thread * waiter)
489 {
490         sigset_t set;
491         int event_nr;
492         int r;
493
494         if (!waiter->event_nr)
495                 waiter->event_nr = dm_geteventnr(waiter->mapname);
496
497         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
498                 return 1;
499
500         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
501                 dm_task_destroy(waiter->dmt);
502                 return 1;
503         }
504
505         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
506                                                       waiter->event_nr)) {
507                 dm_task_destroy(waiter->dmt);
508                 return 1;
509         }
510
511         dm_task_no_open_count(waiter->dmt);
512         
513         /* accept wait interruption */
514         set = unblock_sighup();
515
516         /* interruption spits messages */
517         dm_shut_log();
518
519         /* wait */
520         r = dm_task_run(waiter->dmt);
521
522         /* wait is over : event or interrupt */
523         pthread_sigmask(SIG_SETMASK, &set, NULL);
524         //dm_restore_log();
525
526         if (!r) /* wait interrupted by signal */
527                 return -1;
528
529         dm_task_destroy(waiter->dmt);
530         waiter->dmt = NULL;
531         waiter->event_nr++;
532
533         /*
534          * upon event ...
535          */
536         while (1) {
537                 condlog(3, "%s: devmap event #%i",
538                                 waiter->mapname, waiter->event_nr);
539
540                 /*
541                  * event might be :
542                  *
543                  * 1) a table reload, which means our mpp structure is
544                  *    obsolete : refresh it through update_multipath()
545                  * 2) a path failed by DM : mark as such through
546                  *    update_multipath()
547                  * 3) map has gone away : stop the thread.
548                  * 4) a path reinstate : nothing to do
549                  * 5) a switch group : nothing to do
550                  */
551                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
552                 lock(waiter->vecs->lock);
553                 r = update_multipath(waiter->vecs, waiter->mapname);
554                 lock_cleanup_pop(waiter->vecs->lock);
555
556                 if (r)
557                         return -1; /* stop the thread */
558
559                 event_nr = dm_geteventnr(waiter->mapname);
560
561                 if (waiter->event_nr == event_nr)
562                         return 1; /* upon problem reschedule 1s later */
563
564                 waiter->event_nr = event_nr;
565         }
566         return -1; /* never reach there */
567 }
568
569 static void *
570 waitevent (void * et)
571 {
572         int r;
573         struct event_thread *waiter;
574
575         mlockall(MCL_CURRENT | MCL_FUTURE);
576
577         waiter = (struct event_thread *)et;
578         pthread_cleanup_push(free_waiter, et);
579
580         while (1) {
581                 r = waiteventloop(waiter);
582
583                 if (r < 0)
584                         break;
585
586                 sleep(r);
587         }
588
589         pthread_cleanup_pop(1);
590         return NULL;
591 }
592
593 static int
594 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
595 {
596         pthread_attr_t attr;
597         struct event_thread * wp;
598
599         if (!mpp)
600                 return 0;
601
602         if (pthread_attr_init(&attr))
603                 goto out;
604
605         pthread_attr_setstacksize(&attr, 32 * 1024);
606         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
607
608         wp = alloc_waiter();
609
610         if (!wp)
611                 goto out;
612
613         mpp->waiter = (void *)wp;
614         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
615         wp->vecs = vecs;
616
617         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
618                 condlog(0, "%s: cannot create event checker", wp->mapname);
619                 goto out1;
620         }
621         condlog(2, "%s: event checker started", wp->mapname);
622
623         return 0;
624 out1:
625         free_waiter(wp);
626         mpp->waiter = NULL;
627 out:
628         condlog(0, "failed to start waiter thread");
629         return 1;
630 }
631
632 int
633 uev_add_map (char * devname, struct vectors * vecs)
634 {
635         int major, minor;
636         char dev_t[BLK_DEV_SIZE];
637         char * alias;
638         struct multipath * mpp;
639
640         if (sscanf(devname, "dm-%d", &minor) == 1 &&
641             !sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE) &&
642             sscanf(dev_t, "%d:%d", &major, &minor) == 2)
643                 alias = dm_mapname(major, minor);
644         else
645                 alias = STRDUP(devname);
646                 
647         if (!alias)
648                 return 1;
649         
650         if (!dm_type(alias, DEFAULT_TARGET)) {
651                 condlog(4, "%s: not a multipath map", alias);
652                 FREE(alias);
653                 return 0;
654         }
655
656         mpp = find_mp_by_alias(vecs->mpvec, alias);
657
658         if (mpp) {
659                 /*
660                  * this should not happen,
661                  * we missed a remove map event (not sent ?)
662                  */
663                 condlog(2, "%s: already registered", alias);
664                 remove_map(mpp, vecs);
665         }
666
667         /*
668          * now we can allocate
669          */
670         mpp = alloc_multipath();
671
672         if (!mpp)
673                 return 1;
674
675         mpp->alias = alias;
676
677         if (setup_multipath(vecs, mpp))
678                 return 1; /* mpp freed in setup_multipath */
679
680         if (!vector_alloc_slot(vecs->mpvec))
681                 goto out;
682
683         vector_set_slot(vecs->mpvec, mpp);
684         adopt_paths(vecs, mpp);
685
686         if (start_waiter_thread(mpp, vecs))
687                 goto out;
688
689         return 0;
690 out:
691         condlog(2, "%s: add devmap failed", mpp->alias);
692         remove_map(mpp, vecs);
693         return 1;
694 }
695
696 int
697 uev_remove_map (char * devname, struct vectors * vecs)
698 {
699         int minor;
700         struct multipath * mpp;
701
702         if (sscanf(devname, "dm-%d", &minor) == 1)
703                 mpp = find_mp_by_minor(vecs->mpvec, minor);
704         else
705                 mpp = find_mp_by_alias(vecs->mpvec, devname);
706
707         if (!mpp) {
708                 condlog(3, "%s: devmap not registered, can't remove",
709                         devname);
710                 return 0;
711         }
712
713         condlog(2, "remove %s devmap", mpp->alias);
714         remove_map(mpp, vecs);
715
716         return 0;
717 }
718
719 int
720 uev_add_path (char * devname, struct vectors * vecs)
721 {
722         struct path * pp;
723
724         pp = find_path_by_dev(vecs->pathvec, devname);
725
726         if (pp) {
727                 condlog(3, "%s: already in pathvec");
728                 return 1;
729         }
730         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
731                        devname, DI_SYSFS | DI_WWID);
732
733         if (!pp) {
734                 condlog(0, "%s: failed to store path info", devname);
735                 return 1;
736         }
737
738         condlog(2, "%s: path checker registered", devname);
739         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
740
741         if (pp->mpp) {
742                 condlog(4, "%s: ownership set to %s",
743                                 pp->dev_t, pp->mpp->alias);
744         } else {
745                 condlog(4, "%s: orphaned", pp->dev_t);
746                 orphan_path(pp);
747         }
748
749         return 0;
750 }
751
752 int
753 uev_remove_path (char * devname, struct vectors * vecs)
754 {
755         int i;
756         struct path * pp;
757
758         pp = find_path_by_dev(vecs->pathvec, devname);
759
760         if (!pp) {
761                 condlog(3, "%s: not in pathvec");
762                 return 1;
763         }
764
765         if (pp->mpp && pp->state == PATH_UP)
766                 update_queue_mode_del_path(pp->mpp);
767
768         condlog(2, "remove %s path checker", devname);
769         i = find_slot(vecs->pathvec, (void *)pp);
770         vector_del_slot(vecs->pathvec, i);
771         free_path(pp);
772
773         return 0;
774 }
775
776 int
777 show_paths (char ** r, int * len, struct vectors * vecs)
778 {
779         int i;
780         struct path * pp;
781         char * c;
782         char * reply;
783         int maxlen = INITIAL_REPLY_LEN;
784         int again = 1;
785
786         get_path_layout(vecs->pathvec);
787         reply = MALLOC(maxlen);
788
789         while (again) {
790                 if (!reply)
791                         return 1;
792
793                 c = reply;
794
795                 if (VECTOR_SIZE(vecs->pathvec) > 0)
796                         c += snprint_path_header(c, reply + maxlen - c,
797                                                  PRINT_PATH_CHECKER);
798
799                 vector_foreach_slot(vecs->pathvec, pp, i)
800                         c += snprint_path(c, reply + maxlen - c,
801                                           PRINT_PATH_CHECKER, pp);
802
803                 again = ((c - reply) == (maxlen - 1));
804
805                 if (again)
806                         reply = REALLOC(reply, maxlen *= 2);
807
808         }
809         *r = reply;
810         *len = (int)(c - reply + 1);
811         return 0;
812 }
813
814 int
815 show_maps (char ** r, int *len, struct vectors * vecs)
816 {
817         int i;
818         struct multipath * mpp;
819         char * c;
820         char * reply;
821         int maxlen = INITIAL_REPLY_LEN;
822         int again = 1;
823
824         get_map_layout(vecs->mpvec);
825         reply = MALLOC(maxlen);
826
827         while (again) {
828                 if (!reply)
829                         return 1;
830
831                 c = reply;
832                 if (VECTOR_SIZE(vecs->mpvec) > 0)
833                         c += snprint_map_header(c, reply + maxlen - c,
834                                                 PRINT_MAP_FAILBACK);
835
836                 vector_foreach_slot(vecs->mpvec, mpp, i)
837                         c += snprint_map(c, reply + maxlen - c,
838                                          PRINT_MAP_FAILBACK, mpp);
839
840                 again = ((c - reply) == (maxlen - 1));
841
842                 if (again)
843                         reply = REALLOC(reply, maxlen *= 2);
844         }
845         *r = reply;
846         *len = (int)(c - reply + 1);
847         return 0;
848 }
849
850 int
851 dump_pathvec (char ** r, int * len, struct vectors * vecs)
852 {
853         int i;
854         struct path * pp;
855         char * reply;
856         char * p;
857
858         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
859         reply = (char *)MALLOC(*len);
860         *r = reply;
861
862         if (!reply)
863                 return 1;
864
865         p = reply;
866
867         vector_foreach_slot (vecs->pathvec, pp, i) {
868                 memcpy((void *)p, pp, sizeof(struct path));
869                 p += sizeof(struct path);
870         }
871
872         /* return negative to hint caller not to add "ok" to the dump */
873         return -1;
874 }
875
876 static int
877 map_discovery (struct vectors * vecs)
878 {
879         int i;
880         struct multipath * mpp;
881
882         if (dm_get_maps(vecs->mpvec, "multipath"))
883                 return 1;
884
885         vector_foreach_slot (vecs->mpvec, mpp, i) {
886                 if (setup_multipath(vecs, mpp))
887                         return 1;
888                 start_waiter_thread(mpp, vecs);
889         }
890
891         return 0;
892 }
893
894 int
895 reconfigure (struct vectors * vecs)
896 {
897         struct config * old = conf;
898         struct multipath * mpp;
899         struct path * pp;
900         int i;
901
902         conf = NULL;
903
904         if (load_config(DEFAULT_CONFIGFILE)) {
905                 conf = old;
906                 condlog(2, "reconfigure failed, continue with old config");
907                 return 1;
908         }
909         conf->verbosity = old->verbosity;
910         free_config(old);
911
912         vector_foreach_slot (vecs->mpvec, mpp, i) {
913                 mpp->mpe = find_mpe(mpp->wwid);
914                 mpp->hwe = extract_hwe_from_path(mpp);
915                 adopt_paths(vecs, mpp);
916                 set_no_path_retry(mpp);
917         }
918         vector_foreach_slot (vecs->pathvec, pp, i) {
919                 select_checkfn(pp);
920                 select_getuid(pp);
921                 select_getprio(pp);
922         }
923         condlog(2, "reconfigured");
924         return 0;
925 }
926
927 int
928 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
929 {
930         struct vectors * vecs;
931         int r;
932         
933         *reply = NULL;
934         *len = 0;
935         vecs = (struct vectors *)trigger_data;
936
937         pthread_cleanup_push(cleanup_lock, vecs->lock);
938         lock(vecs->lock);
939
940         r = parse_cmd(str, reply, len, vecs);
941
942         if (r > 0) {
943                 *reply = STRDUP("fail\n");
944                 *len = strlen(*reply) + 1;
945                 r = 1;
946         }
947         else if (!r && *len == 0) {
948                 *reply = STRDUP("ok\n");
949                 *len = strlen(*reply) + 1;
950                 r = 0;
951         }
952         /* else if (r < 0) leave *reply alone */
953
954         lock_cleanup_pop(vecs->lock);
955         return r;
956 }
957
958 static int
959 uev_discard(char * devpath)
960 {
961         char a[10], b[10];
962
963         /*
964          * keep only block devices, discard partitions
965          */
966         if (sscanf(devpath, "/block/%10s", a) != 1 ||
967             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
968                 condlog(4, "discard event on %s", devpath);
969                 return 1;
970         }
971         return 0;
972 }
973
974 int 
975 uev_trigger (struct uevent * uev, void * trigger_data)
976 {
977         int r = 0;
978         char devname[32];
979         struct vectors * vecs;
980
981         vecs = (struct vectors *)trigger_data;
982
983         if (uev_discard(uev->devpath))
984                 return 1;
985
986         basename(uev->devpath, devname);
987         lock(vecs->lock);
988
989         /*
990          * device map add/remove event
991          */
992         if (!strncmp(devname, "dm-", 3)) {
993                 if (!strncmp(uev->action, "add", 3)) {
994                         r = uev_add_map(devname, vecs);
995                         goto out;
996                 }
997 #if 0
998                 if (!strncmp(uev->action, "remove", 6)) {
999                         r = uev_remove_map(devname, vecs);
1000                         goto out;
1001                 }
1002 #endif
1003                 goto out;
1004         }
1005         
1006         /*
1007          * path add/remove event
1008          */
1009         if (blacklist(conf->blist, devname))
1010                 goto out;
1011
1012         if (!strncmp(uev->action, "add", 3)) {
1013                 r = uev_add_path(devname, vecs);
1014                 goto out;
1015         }
1016         if (!strncmp(uev->action, "remove", 6)) {
1017                 r = uev_remove_path(devname, vecs);
1018                 goto out;
1019         }
1020
1021 out:
1022         unlock(vecs->lock);
1023         return r;
1024 }
1025
1026 static void *
1027 ueventloop (void * ap)
1028 {
1029         if (uevent_listen(&uev_trigger, ap))
1030                 fprintf(stderr, "error starting uevent listener");
1031                 
1032         return NULL;
1033 }
1034
1035 static void *
1036 uxlsnrloop (void * ap)
1037 {
1038         if (load_keys())
1039                 return NULL;
1040         
1041         if (alloc_handlers())
1042                 return NULL;
1043
1044         add_handler(LIST+PATHS, cli_list_paths);
1045         add_handler(LIST+MAPS, cli_list_maps);
1046         add_handler(ADD+PATH, cli_add_path);
1047         add_handler(DEL+PATH, cli_del_path);
1048         add_handler(ADD+MAP, cli_add_map);
1049         add_handler(DEL+MAP, cli_del_map);
1050         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
1051         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
1052         add_handler(RECONFIGURE, cli_reconfigure);
1053         add_handler(SUSPEND+MAP, cli_suspend);
1054         add_handler(RESUME+MAP, cli_resume);
1055         add_handler(REINSTATE+PATH, cli_reinstate);
1056         add_handler(FAIL+PATH, cli_fail);
1057
1058         uxsock_listen(&uxsock_trigger, ap);
1059
1060         return NULL;
1061 }
1062
1063 static int
1064 exit_daemon (int status)
1065 {
1066         if (status != 0)
1067                 fprintf(stderr, "bad exit status. see daemon.log\n");
1068
1069         condlog(3, "unlink pidfile");
1070         unlink(DEFAULT_PIDFILE);
1071
1072         lock(&exit_mutex);
1073         pthread_cond_signal(&exit_cond);
1074         unlock(&exit_mutex);
1075
1076         return status;
1077 }
1078
1079 static void
1080 fail_path (struct path * pp)
1081 {
1082         if (!pp->mpp)
1083                 return;
1084
1085         condlog(2, "checker failed path %s in map %s",
1086                  pp->dev_t, pp->mpp->alias);
1087
1088         dm_fail_path(pp->mpp->alias, pp->dev_t);
1089         update_queue_mode_del_path(pp->mpp);
1090 }
1091
1092 /*
1093  * caller must have locked the path list before calling that function
1094  */
1095 static void
1096 reinstate_path (struct path * pp)
1097 {
1098         if (!pp->mpp)
1099                 return;
1100
1101         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1102                 condlog(0, "%s: reinstate failed", pp->dev_t);
1103         else {
1104                 condlog(2, "%s: reinstated", pp->dev_t);
1105                 update_queue_mode_add_path(pp->mpp);
1106         }
1107 }
1108
1109 static void
1110 enable_group(struct path * pp)
1111 {
1112         struct pathgroup * pgp;
1113
1114         /*
1115          * if path is added through uev_add_path, pgindex can be unset.
1116          * next update_strings() will set it, upon map reload event.
1117          *
1118          * we can safely return here, because upon map reload, all
1119          * PG will be enabled.
1120          */
1121         if (!pp->mpp->pg || !pp->pgindex)
1122                 return;
1123
1124         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1125         
1126         if (pgp->status == PGSTATE_DISABLED) {
1127                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1128                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1129         }
1130 }
1131
1132 static void
1133 mpvec_garbage_collector (struct vectors * vecs)
1134 {
1135         struct multipath * mpp;
1136         int i;
1137
1138         vector_foreach_slot (vecs->mpvec, mpp, i) {
1139                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1140                         condlog(2, "%s: remove dead map", mpp->alias);
1141                         remove_map(mpp, vecs);
1142                         i--;
1143                 }
1144         }
1145 }
1146
1147 static void
1148 defered_failback_tick (vector mpvec)
1149 {
1150         struct multipath * mpp;
1151         int i;
1152
1153         vector_foreach_slot (mpvec, mpp, i) {
1154                 /*
1155                  * defered failback getting sooner
1156                  */
1157                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1158                         mpp->failback_tick--;
1159
1160                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1161                                 switch_pathgroup(mpp);
1162                 }
1163         }
1164 }
1165
1166 static void
1167 retry_count_tick(vector mpvec)
1168 {
1169         struct multipath *mpp;
1170         int i;
1171
1172         vector_foreach_slot (mpvec, mpp, i) {
1173                 if (mpp->retry_tick) {
1174                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1175                         if(--mpp->retry_tick == 0) {
1176                                 dm_queue_if_no_path(mpp->alias, 0);
1177                                 condlog(2, "%s: Disable queueing", mpp->alias);
1178                         }
1179                 }
1180         }
1181 }
1182
1183 static void *
1184 checkerloop (void *ap)
1185 {
1186         struct vectors *vecs;
1187         struct path *pp;
1188         int i, count = 0;
1189         int newstate;
1190         char checker_msg[MAX_CHECKER_MSG_SIZE];
1191
1192         mlockall(MCL_CURRENT | MCL_FUTURE);
1193
1194         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1195         vecs = (struct vectors *)ap;
1196
1197         condlog(2, "path checkers start up");
1198
1199         /*
1200          * init the path check interval
1201          */
1202         vector_foreach_slot (vecs->pathvec, pp, i) {
1203                 pp->checkint = conf->checkint;
1204         }
1205
1206         while (1) {
1207                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1208                 lock(vecs->lock);
1209                 condlog(4, "tick");
1210
1211                 vector_foreach_slot (vecs->pathvec, pp, i) {
1212                         if (!pp->mpp)
1213                                 continue;
1214
1215                         if (pp->tick && --pp->tick)
1216                                 continue; /* don't check this path yet */
1217
1218                         /*
1219                          * provision a next check soonest,
1220                          * in case we exit abnormaly from here
1221                          */
1222                         pp->tick = conf->checkint;
1223                         
1224                         if (!pp->checkfn) {
1225                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1226                                 select_checkfn(pp);
1227                         }
1228
1229                         if (!pp->checkfn) {
1230                                 condlog(0, "%s: checkfn is void", pp->dev);
1231                                 continue;
1232                         }
1233                         newstate = pp->checkfn(pp->fd, checker_msg,
1234                                                &pp->checker_context);
1235                         
1236                         if (newstate < 0) {
1237                                 condlog(2, "%s: unusable path", pp->dev);
1238                                 pathinfo(pp, conf->hwtable, 0);
1239                                 continue;
1240                         }
1241
1242                         if (newstate != pp->state) {
1243                                 pp->state = newstate;
1244                                 LOG_MSG(1, checker_msg);
1245
1246                                 /*
1247                                  * upon state change, reset the checkint
1248                                  * to the shortest delay
1249                                  */
1250                                 pp->checkint = conf->checkint;
1251
1252                                 if (newstate == PATH_DOWN ||
1253                                     newstate == PATH_SHAKY ||
1254                                     update_multipath_strings(pp->mpp,
1255                                                              vecs->pathvec)) {
1256                                         /*
1257                                          * proactively fail path in the DM
1258                                          */
1259                                         fail_path(pp);
1260
1261                                         /*
1262                                          * cancel scheduled failback
1263                                          */
1264                                         pp->mpp->failback_tick = 0;
1265
1266                                         continue;
1267                                 }
1268
1269                                 /*
1270                                  * reinstate this path
1271                                  */
1272                                 reinstate_path(pp);
1273
1274                                 /*
1275                                  * schedule [defered] failback
1276                                  */
1277                                 if (pp->mpp->pgfailback > 0)
1278                                         pp->mpp->failback_tick =
1279                                                 pp->mpp->pgfailback + 1;
1280                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1281                                     need_switch_pathgroup(pp->mpp, 1))
1282                                         switch_pathgroup(pp->mpp);
1283
1284                                 /*
1285                                  * if at least one path is up in a group, and
1286                                  * the group is disabled, re-enable it
1287                                  */
1288                                 if (newstate == PATH_UP)
1289                                         enable_group(pp);
1290                         }
1291                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1292                                 LOG_MSG(4, checker_msg);
1293                                 /*
1294                                  * double the next check delay.
1295                                  * max at conf->max_checkint
1296                                  */
1297                                 if (pp->checkint < (conf->max_checkint / 2))
1298                                         pp->checkint = 2 * pp->checkint;
1299                                 else
1300                                         pp->checkint = conf->max_checkint;
1301
1302                                 pp->tick = pp->checkint;
1303                                 condlog(4, "%s: delay next check %is",
1304                                                 pp->dev_t, pp->tick);
1305
1306                         }
1307                         pp->state = newstate;
1308
1309                         /*
1310                          * path prio refreshing
1311                          */
1312                         condlog(4, "path prio refresh");
1313                         pathinfo(pp, conf->hwtable, DI_PRIO);
1314
1315                         if (need_switch_pathgroup(pp->mpp, 0)) {
1316                                 if (pp->mpp->pgfailback > 0)
1317                                         pp->mpp->failback_tick =
1318                                                 pp->mpp->pgfailback + 1;
1319                                 else if (pp->mpp->pgfailback ==
1320                                                 -FAILBACK_IMMEDIATE)
1321                                         switch_pathgroup(pp->mpp);
1322                         }
1323                 }
1324                 defered_failback_tick(vecs->mpvec);
1325                 retry_count_tick(vecs->mpvec);
1326
1327                 if (count)
1328                         count--;
1329                 else {
1330                         condlog(4, "map garbage collection");
1331                         mpvec_garbage_collector(vecs);
1332                         count = MAPGCINT;
1333                 }
1334                 
1335                 lock_cleanup_pop(vecs->lock);
1336                 sleep(1);
1337         }
1338         return NULL;
1339 }
1340
1341 static struct vectors *
1342 init_vecs (void)
1343 {
1344         struct vectors * vecs;
1345
1346         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1347
1348         if (!vecs)
1349                 return NULL;
1350
1351         vecs->lock = 
1352                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1353
1354         if (!vecs->lock)
1355                 goto out;
1356
1357         vecs->pathvec = vector_alloc();
1358
1359         if (!vecs->pathvec)
1360                 goto out1;
1361                 
1362         vecs->mpvec = vector_alloc();
1363
1364         if (!vecs->mpvec)
1365                 goto out2;
1366         
1367         pthread_mutex_init(vecs->lock, NULL);
1368
1369         return vecs;
1370
1371 out2:
1372         vector_free(vecs->pathvec);
1373 out1:
1374         FREE(vecs->lock);
1375 out:
1376         FREE(vecs);
1377         condlog(0, "failed to init paths");
1378         return NULL;
1379 }
1380
1381 static void *
1382 signal_set(int signo, void (*func) (int))
1383 {
1384         int r;
1385         struct sigaction sig;
1386         struct sigaction osig;
1387
1388         sig.sa_handler = func;
1389         sigemptyset(&sig.sa_mask);
1390         sig.sa_flags = 0;
1391
1392         r = sigaction(signo, &sig, &osig);
1393
1394         if (r < 0)
1395                 return (SIG_ERR);
1396         else
1397                 return (osig.sa_handler);
1398 }
1399
1400 static void
1401 sighup (int sig)
1402 {
1403         condlog(3, "SIGHUP received");
1404
1405 #ifdef _DEBUG_
1406         dbg_free_final(NULL);
1407 #endif
1408 }
1409
1410 static void
1411 sigend (int sig)
1412 {
1413         exit_daemon(0);
1414 }
1415
1416 static void
1417 signal_init(void)
1418 {
1419         signal_set(SIGHUP, sighup);
1420         signal_set(SIGINT, sigend);
1421         signal_set(SIGTERM, sigend);
1422         signal_set(SIGKILL, sigend);
1423 }
1424
1425 static void
1426 setscheduler (void)
1427 {
1428         int res;
1429         static struct sched_param sched_param = {
1430                 sched_priority: 99
1431         };
1432
1433         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1434
1435         if (res == -1)
1436                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1437         return;
1438 }
1439
1440 static void
1441 set_oom_adj (int val)
1442 {
1443         FILE *fp;
1444
1445         fp = fopen("/proc/self/oom_adj", "w");
1446
1447         if (!fp)
1448                 return;
1449
1450         fprintf(fp, "%i", val);
1451         fclose(fp);
1452 }
1453         
1454 static int
1455 child (void * param)
1456 {
1457         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1458         pthread_attr_t attr;
1459         struct vectors * vecs;
1460
1461         mlockall(MCL_CURRENT | MCL_FUTURE);
1462
1463         if (logsink)
1464                 log_thread_start();
1465
1466         condlog(2, "--------start up--------");
1467         condlog(2, "read " DEFAULT_CONFIGFILE);
1468
1469         if (load_config(DEFAULT_CONFIGFILE))
1470                 exit(1);
1471
1472         setlogmask(LOG_UPTO(conf->verbosity + 3));
1473
1474         /*
1475          * fill the voids left in the config file
1476          */
1477         if (!conf->checkint) {
1478                 conf->checkint = CHECKINT;
1479                 conf->max_checkint = MAX_CHECKINT;
1480         }
1481
1482         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1483                 if (logsink)
1484                         log_thread_stop();
1485
1486                 exit(1);
1487         }
1488         signal_init();
1489         setscheduler();
1490         set_oom_adj(-17);
1491         vecs = init_vecs();
1492
1493         if (!vecs)
1494                 exit(1);
1495
1496         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1497                 condlog(0, "can not find sysfs mount point");
1498                 exit(1);
1499         }
1500
1501         /*
1502          * fetch paths and multipaths lists
1503          * no paths and/or no multipaths are valid scenarii
1504          * vectors maintenance will be driven by events
1505          */
1506         path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
1507         map_discovery(vecs);
1508
1509         /*
1510          * start threads
1511          */
1512         pthread_attr_init(&attr);
1513         pthread_attr_setstacksize(&attr, 64 * 1024);
1514         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1515         
1516         pthread_create(&check_thr, &attr, checkerloop, vecs);
1517         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1518         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1519
1520         pthread_cond_wait(&exit_cond, &exit_mutex);
1521
1522         /*
1523          * exit path
1524          */
1525         lock(vecs->lock);
1526         remove_maps(vecs);
1527         free_pathvec(vecs->pathvec, FREE_PATHS);
1528
1529         pthread_cancel(check_thr);
1530         pthread_cancel(uevent_thr);
1531         pthread_cancel(uxlsnr_thr);
1532
1533         free_keys(keys);
1534         keys = NULL;
1535         free_handlers(handlers);
1536         handlers = NULL;
1537         free_polls();
1538
1539         unlock(vecs->lock);
1540         pthread_mutex_destroy(vecs->lock);
1541         FREE(vecs->lock);
1542         vecs->lock = NULL;
1543         FREE(vecs);
1544         vecs = NULL;
1545         free_config(conf);
1546         conf = NULL;
1547
1548         condlog(2, "--------shut down-------");
1549         
1550         if (logsink)
1551                 log_thread_stop();
1552
1553         dm_lib_release();
1554         dm_lib_exit();
1555
1556 #ifdef _DEBUG_
1557         dbg_free_final(NULL);
1558 #endif
1559
1560         exit(0);
1561 }
1562
1563 static int
1564 daemonize(void)
1565 {
1566         int pid;
1567         int in_fd, out_fd;
1568
1569         if( (pid = fork()) < 0){
1570                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1571                 return -1;
1572         }
1573         else if (pid != 0)
1574                 return pid;
1575
1576         setsid();
1577
1578         if ( (pid = fork()) < 0)
1579                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1580         else if (pid != 0)
1581                 _exit(0);
1582
1583         in_fd = open("/dev/null", O_RDONLY);
1584         if (in_fd < 0){
1585                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1586                         strerror(errno));
1587                 _exit(0);
1588         }
1589         out_fd = open("/dev/console", O_WRONLY);
1590         if (out_fd < 0){
1591                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1592                         strerror(errno));
1593                 _exit(0);
1594         }
1595
1596         close(STDIN_FILENO);
1597         dup(in_fd);
1598         close(STDOUT_FILENO);
1599         dup(out_fd);
1600         close(STDERR_FILENO);
1601         dup(out_fd);
1602
1603         close(in_fd);
1604         close(out_fd);
1605         chdir("/");
1606         umask(0);
1607         return 0;
1608 }
1609
1610 int
1611 main (int argc, char *argv[])
1612 {
1613         extern char *optarg;
1614         extern int optind;
1615         int arg;
1616         int err;
1617         
1618         logsink = 1;
1619
1620         if (getuid() != 0) {
1621                 fprintf(stderr, "need to be root\n");
1622                 exit(1);
1623         }
1624
1625         /* make sure we don't lock any path */
1626         chdir("/");
1627         umask(umask(077) | 022);
1628
1629         conf = alloc_config();
1630
1631         if (!conf)
1632                 exit(1);
1633
1634         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1635         switch(arg) {
1636                 case 'd':
1637                         logsink = 0;
1638                         //debug=1; /* ### comment me out ### */
1639                         break;
1640                 case 'v':
1641                         if (sizeof(optarg) > sizeof(char *) ||
1642                             !isdigit(optarg[0]))
1643                                 exit(1);
1644
1645                         conf->verbosity = atoi(optarg);
1646                         break;
1647                 case 'k':
1648                         uxclnt(optarg);
1649                         exit(0);
1650                 default:
1651                         ;
1652                 }
1653         }
1654
1655         if (!logsink)
1656                 err = 0;
1657         else
1658                 err = daemonize();
1659         
1660         if (err < 0)
1661                 /* error */
1662                 exit(1);
1663         else if (err > 0)
1664                 /* parent dies */
1665                 exit(0);
1666         else
1667                 /* child lives */
1668                 return (child(NULL));
1669 }