[multipathd] more flexible remove_map()
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15
16 /*
17  * libsysfs
18  */
19 #include <sysfs/libsysfs.h>
20 #include <sysfs/dlist.h>
21
22 /*
23  * libcheckers
24  */
25 #include <checkers.h>
26 #include <path_state.h>
27
28 /*
29  * libmultipath
30  */
31 #include <parser.h>
32 #include <vector.h>
33 #include <memory.h>
34 #include <config.h>
35 #include <callout.h>
36 #include <util.h>
37 #include <blacklist.h>
38 #include <hwtable.h>
39 #include <defaults.h>
40 #include <structs.h>
41 #include <dmparser.h>
42 #include <devmapper.h>
43 #include <dict.h>
44 #include <discovery.h>
45 #include <debug.h>
46 #include <propsel.h>
47 #include <uevent.h>
48 #include <switchgroup.h>
49 #include <path_state.h>
50 #include <print.h>
51
52 #include "main.h"
53 #include "pidfile.h"
54 #include "uxlsnr.h"
55 #include "uxclnt.h"
56 #include "cli.h"
57 #include "cli_handlers.h"
58
59 #define FILE_NAME_SIZE 256
60 #define CMDSIZE 160
61
62 #define LOG_MSG(a,b) \
63         if (strlen(b)) { \
64                 condlog(a, "%s: %s", pp->dev_t, b); \
65                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
66         }
67
68 #ifdef LCKDBG
69 #define lock(a) \
70         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_mutex_lock(a)
72 #define unlock(a) \
73         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_unlock(a)
75 #define lock_cleanup_pop(a) \
76         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
77         pthread_cleanup_pop(1);
78 #else
79 #define lock(a) pthread_mutex_lock(a)
80 #define unlock(a) pthread_mutex_unlock(a)
81 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
82 #endif
83
84 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
85 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
86
87 typedef void (stop_waiter_thread_func) (struct multipath *, struct vectors *);
88
89 /*
90  * structs
91  */
92 struct event_thread {
93         struct dm_task *dmt;
94         pthread_t thread;
95         int event_nr;
96         char mapname[WWID_SIZE];
97         struct vectors *vecs;
98 };
99
100 static struct event_thread *
101 alloc_waiter (void)
102 {
103
104         struct event_thread * wp;
105
106         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
107
108         return wp;
109 }
110
111 static void
112 free_waiter (void * data)
113 {
114         struct event_thread * wp = (struct event_thread *)data;
115
116         if (wp->dmt)
117                 dm_task_destroy(wp->dmt);
118         FREE(wp);
119 }
120
121 static void
122 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
123 {
124         struct event_thread * wp = (struct event_thread *)mpp->waiter;
125         
126         if (!wp) {
127                 condlog(3, "%s: no waiter thread", mpp->alias);
128                 return;
129         }
130         condlog(2, "%s: stop event checker thread", wp->mapname);
131         pthread_kill((pthread_t)wp->thread, SIGHUP);
132 }
133
134 static void
135 cleanup_lock (void * data)
136 {
137         pthread_mutex_unlock((pthread_mutex_t *)data);
138 }
139
140 static void
141 adopt_paths (struct vectors * vecs, struct multipath * mpp)
142 {
143         int i;
144         struct path * pp;
145
146         if (!mpp)
147                 return;
148
149         vector_foreach_slot (vecs->pathvec, pp, i) {
150                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
151                         condlog(4, "%s ownership set", pp->dev_t);
152                         pp->mpp = mpp;
153                 }
154         }
155 }
156
157 static void
158 orphan_path (struct path * pp)
159 {
160         pp->mpp = NULL;
161         pp->checkfn = NULL;
162         pp->dmstate = PSTATE_UNDEF;
163         pp->checker_context = NULL;
164         pp->getuid = NULL;
165         pp->getprio = NULL;
166         pp->getprio_selected = 0;
167
168         if (pp->fd >= 0)
169                 close(pp->fd);
170
171         pp->fd = -1;
172 }
173
174 static void
175 orphan_paths (struct vectors * vecs, struct multipath * mpp)
176 {
177         int i;
178         struct path * pp;
179
180         vector_foreach_slot (vecs->pathvec, pp, i) {
181                 if (pp->mpp == mpp) {
182                         condlog(4, "%s is orphaned", pp->dev_t);
183                         orphan_path(pp);
184                 }
185         }
186 }
187
188 static int
189 update_multipath_table (struct multipath *mpp, vector pathvec)
190 {
191         if (!mpp)
192                 return 1;
193
194         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
195                 return 1;
196
197         if (disassemble_map(pathvec, mpp->params, mpp))
198                 return 1;
199
200         return 0;
201 }
202
203 static int
204 update_multipath_status (struct multipath *mpp)
205 {
206         if (!mpp)
207                 return 1;
208
209         if(dm_get_status(mpp->alias, mpp->status))
210                 return 1;
211
212         if (disassemble_status(mpp->status, mpp))
213                 return 1;
214
215         return 0;
216 }
217
218 static int
219 update_multipath_strings (struct multipath *mpp, vector pathvec)
220 {
221         free_multipath_attributes(mpp);
222         free_pgvec(mpp->pg, KEEP_PATHS);
223         mpp->pg = NULL;
224
225         if (update_multipath_table(mpp, pathvec))
226                 return 1;
227
228         if (update_multipath_status(mpp))
229                 return 1;
230
231         return 0;
232 }
233
234 static void
235 set_multipath_wwid (struct multipath * mpp)
236 {
237         if (mpp->wwid)
238                 return;
239
240         dm_get_uuid(mpp->alias, mpp->wwid);
241 }
242
243 /*
244  * mpp->no_path_retry:
245  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
246  *   -1 (FAIL)  : fail_if_no_path
247  *    0 (UNDEF) : nothing
248  *   >0         : queue_if_no_path enabled, turned off after polling n times
249  */
250 static void
251 update_queue_mode_del_path(struct multipath *mpp)
252 {
253         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
254                 /*
255                  * Enter retry mode.
256                  * meaning of +1: retry_tick may be decremented in
257                  *                checkerloop before starting retry.
258                  */
259                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
260                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
261                         mpp->alias, mpp->no_path_retry);
262         }
263         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
264 }
265
266 static void
267 update_queue_mode_add_path(struct multipath *mpp)
268 {
269         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
270                 /* come back to normal mode from retry mode */
271                 mpp->retry_tick = 0;
272                 dm_queue_if_no_path(mpp->alias, 1);
273                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
274                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
275         }
276         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
277 }
278
279 static void
280 set_no_path_retry(struct multipath *mpp)
281 {
282         mpp->retry_tick = 0;
283         mpp->nr_active = pathcount(mpp, PATH_UP);
284         select_no_path_retry(mpp);
285
286         switch (mpp->no_path_retry) {
287         case NO_PATH_RETRY_UNDEF:
288                 break;
289         case NO_PATH_RETRY_FAIL:
290                 dm_queue_if_no_path(mpp->alias, 0);
291                 break;
292         case NO_PATH_RETRY_QUEUE:
293                 dm_queue_if_no_path(mpp->alias, 1);
294                 break;
295         default:
296                 dm_queue_if_no_path(mpp->alias, 1);
297                 if (mpp->nr_active == 0) {
298                         /* Enter retry mode */
299                         mpp->retry_tick = mpp->no_path_retry * conf->checkint;
300                         condlog(1, "%s: Entering recovery mode: max_retries=%d",
301                                 mpp->alias, mpp->no_path_retry);
302                 }
303                 break;
304         }
305 }
306
307 static struct hwentry *
308 extract_hwe_from_path(struct multipath * mpp)
309 {
310         struct path * pp;
311         struct pathgroup * pgp;
312
313         pgp = VECTOR_SLOT(mpp->pg, 0);
314         pp = VECTOR_SLOT(pgp->paths, 0);
315
316         return pp->hwe;
317 }
318
319 static void
320 remove_map (struct multipath * mpp, struct vectors * vecs,
321             stop_waiter_thread_func *stop_waiter, int purge_vec)
322 {
323         int i;
324
325         /*
326          * stop the DM event waiter thread
327          */
328         if (stop_waiter)
329                 stop_waiter(mpp, vecs);
330
331         /*
332          * clear references to this map
333          */
334         orphan_paths(vecs, mpp);
335
336         if (purge_vec &&
337             (i = find_slot(vecs->mpvec, (void *)mpp)) != -1)
338                 vector_del_slot(vecs->mpvec, i);
339
340         /*
341          * final free
342          */
343         free_multipath(mpp, KEEP_PATHS);
344 }
345
346 static void
347 remove_maps (struct vectors * vecs,
348              stop_waiter_thread_func *stop_waiter)
349 {
350         int i;
351         struct multipath * mpp;
352
353         vector_foreach_slot (vecs->mpvec, mpp, i) {
354                 remove_map(mpp, vecs, stop_waiter, 1);
355                 i--;
356         }
357
358         vector_free(vecs->mpvec);
359         vecs->mpvec = NULL;
360 }
361
362 static int
363 setup_multipath (struct vectors * vecs, struct multipath * mpp)
364 {
365         if (dm_get_info(mpp->alias, &mpp->dmi))
366                 goto out;
367
368         set_multipath_wwid(mpp);
369         mpp->mpe = find_mpe(mpp->wwid);
370         condlog(4, "discovered map %s", mpp->alias);
371
372         if (update_multipath_strings(mpp, vecs->pathvec))
373                 goto out;
374
375         adopt_paths(vecs, mpp);
376         mpp->hwe = extract_hwe_from_path(mpp);
377         select_pgfailback(mpp);
378         set_no_path_retry(mpp);
379
380         return 0;
381 out:
382         condlog(0, "%s: failed to setup multipath", mpp->alias);
383         remove_map(mpp, vecs, stop_waiter_thread, 1);
384         return 1;
385 }
386
387 static int
388 need_switch_pathgroup (struct multipath * mpp, int refresh)
389 {
390         struct pathgroup * pgp;
391         struct path * pp;
392         int i, j;
393
394         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
395                 return 0;
396
397         /*
398          * Refresh path priority values
399          */
400         if (refresh)
401                 vector_foreach_slot (mpp->pg, pgp, i)
402                         vector_foreach_slot (pgp->paths, pp, j)
403                                 pathinfo(pp, conf->hwtable, DI_PRIO);
404
405         mpp->bestpg = select_path_group(mpp);
406
407         if (mpp->bestpg != mpp->nextpg)
408                 return 1;
409
410         return 0;
411 }
412
413 static void
414 switch_pathgroup (struct multipath * mpp)
415 {
416         dm_switchgroup(mpp->alias, mpp->bestpg);
417         condlog(2, "%s: switch to path group #%i",
418                  mpp->alias, mpp->bestpg);
419 }
420
421 static int
422 update_multipath (struct vectors *vecs, char *mapname)
423 {
424         struct multipath *mpp;
425         struct pathgroup  *pgp;
426         struct path *pp;
427         int i, j;
428         int r = 1;
429
430         mpp = find_mp_by_alias(vecs->mpvec, mapname);
431
432         if (!mpp)
433                 goto out;
434
435         free_pgvec(mpp->pg, KEEP_PATHS);
436         mpp->pg = NULL;
437
438         if (setup_multipath(vecs, mpp))
439                 goto out; /* mpp freed in setup_multipath */
440
441         /*
442          * compare checkers states with DM states
443          */
444         vector_foreach_slot (mpp->pg, pgp, i) {
445                 vector_foreach_slot (pgp->paths, pp, j) {
446                         if (pp->dmstate != PSTATE_FAILED)
447                                 continue;
448
449                         if (pp->state != PATH_DOWN) {
450                                 condlog(2, "%s: mark as failed", pp->dev_t);
451                                 pp->state = PATH_DOWN;
452                                 update_queue_mode_del_path(mpp);
453
454                                 /*
455                                  * if opportune,
456                                  * schedule the next check earlier
457                                  */
458                                 if (pp->tick > conf->checkint)
459                                         pp->tick = conf->checkint;
460                         }
461                 }
462         }
463         r = 0;
464 out:
465         if (r)
466                 condlog(0, "failed to update multipath");
467
468         return r;
469 }
470
471 static sigset_t unblock_sighup(void)
472 {
473         sigset_t set, old;
474
475         sigemptyset(&set);
476         sigaddset(&set, SIGHUP);
477         pthread_sigmask(SIG_UNBLOCK, &set, &old);
478         return old;
479 }
480
481 /*
482  * returns the reschedule delay
483  * negative means *stop*
484  */
485 static int
486 waiteventloop (struct event_thread * waiter)
487 {
488         sigset_t set;
489         int event_nr;
490         int r;
491
492         if (!waiter->event_nr)
493                 waiter->event_nr = dm_geteventnr(waiter->mapname);
494
495         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
496                 return 1;
497
498         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
499                 dm_task_destroy(waiter->dmt);
500                 return 1;
501         }
502
503         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
504                                                       waiter->event_nr)) {
505                 dm_task_destroy(waiter->dmt);
506                 return 1;
507         }
508
509         dm_task_no_open_count(waiter->dmt);
510         
511         /* accept wait interruption */
512         set = unblock_sighup();
513
514         /* interruption spits messages */
515         dm_shut_log();
516
517         /* wait */
518         r = dm_task_run(waiter->dmt);
519
520         /* wait is over : event or interrupt */
521         pthread_sigmask(SIG_SETMASK, &set, NULL);
522         //dm_restore_log();
523
524         if (!r) /* wait interrupted by signal */
525                 return -1;
526
527         dm_task_destroy(waiter->dmt);
528         waiter->dmt = NULL;
529         waiter->event_nr++;
530
531         /*
532          * upon event ...
533          */
534         while (1) {
535                 condlog(3, "%s: devmap event #%i",
536                                 waiter->mapname, waiter->event_nr);
537
538                 /*
539                  * event might be :
540                  *
541                  * 1) a table reload, which means our mpp structure is
542                  *    obsolete : refresh it through update_multipath()
543                  * 2) a path failed by DM : mark as such through
544                  *    update_multipath()
545                  * 3) map has gone away : stop the thread.
546                  * 4) a path reinstate : nothing to do
547                  * 5) a switch group : nothing to do
548                  */
549                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
550                 lock(waiter->vecs->lock);
551                 r = update_multipath(waiter->vecs, waiter->mapname);
552                 lock_cleanup_pop(waiter->vecs->lock);
553
554                 if (r)
555                         return -1; /* stop the thread */
556
557                 event_nr = dm_geteventnr(waiter->mapname);
558
559                 if (waiter->event_nr == event_nr)
560                         return 1; /* upon problem reschedule 1s later */
561
562                 waiter->event_nr = event_nr;
563         }
564         return -1; /* never reach there */
565 }
566
567 static void *
568 waitevent (void * et)
569 {
570         int r;
571         struct event_thread *waiter;
572
573         mlockall(MCL_CURRENT | MCL_FUTURE);
574
575         waiter = (struct event_thread *)et;
576         pthread_cleanup_push(free_waiter, et);
577
578         while (1) {
579                 r = waiteventloop(waiter);
580
581                 if (r < 0)
582                         break;
583
584                 sleep(r);
585         }
586
587         pthread_cleanup_pop(1);
588         return NULL;
589 }
590
591 static int
592 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
593 {
594         pthread_attr_t attr;
595         struct event_thread * wp;
596
597         if (!mpp)
598                 return 0;
599
600         if (pthread_attr_init(&attr))
601                 goto out;
602
603         pthread_attr_setstacksize(&attr, 32 * 1024);
604         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
605
606         wp = alloc_waiter();
607
608         if (!wp)
609                 goto out;
610
611         mpp->waiter = (void *)wp;
612         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
613         wp->vecs = vecs;
614
615         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
616                 condlog(0, "%s: cannot create event checker", wp->mapname);
617                 goto out1;
618         }
619         condlog(2, "%s: event checker started", wp->mapname);
620
621         return 0;
622 out1:
623         free_waiter(wp);
624         mpp->waiter = NULL;
625 out:
626         condlog(0, "failed to start waiter thread");
627         return 1;
628 }
629
630 int
631 uev_add_map (char * devname, struct vectors * vecs)
632 {
633         int major, minor;
634         char dev_t[BLK_DEV_SIZE];
635         char * alias;
636         struct multipath * mpp;
637
638         if (sscanf(devname, "dm-%d", &minor) == 1 &&
639             !sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE) &&
640             sscanf(dev_t, "%d:%d", &major, &minor) == 2)
641                 alias = dm_mapname(major, minor);
642         else
643                 alias = STRDUP(devname);
644                 
645         if (!alias)
646                 return 1;
647         
648         if (!dm_type(alias, DEFAULT_TARGET)) {
649                 condlog(4, "%s: not a multipath map", alias);
650                 FREE(alias);
651                 return 0;
652         }
653
654         mpp = find_mp_by_alias(vecs->mpvec, alias);
655
656         if (mpp) {
657                 /*
658                  * this should not happen,
659                  * we missed a remove map event (not sent ?)
660                  */
661                 condlog(2, "%s: already registered", alias);
662                 remove_map(mpp, vecs, stop_waiter_thread, 1);
663         }
664
665         /*
666          * now we can allocate
667          */
668         mpp = alloc_multipath();
669
670         if (!mpp)
671                 return 1;
672
673         mpp->alias = alias;
674
675         if (setup_multipath(vecs, mpp))
676                 return 1; /* mpp freed in setup_multipath */
677
678         if (!vector_alloc_slot(vecs->mpvec))
679                 goto out;
680
681         vector_set_slot(vecs->mpvec, mpp);
682         adopt_paths(vecs, mpp);
683
684         if (start_waiter_thread(mpp, vecs))
685                 goto out;
686
687         return 0;
688 out:
689         condlog(2, "%s: add devmap failed", mpp->alias);
690         remove_map(mpp, vecs, stop_waiter_thread, 1);
691         return 1;
692 }
693
694 int
695 uev_remove_map (char * devname, struct vectors * vecs)
696 {
697         int minor;
698         struct multipath * mpp;
699
700         if (sscanf(devname, "dm-%d", &minor) == 1)
701                 mpp = find_mp_by_minor(vecs->mpvec, minor);
702         else
703                 mpp = find_mp_by_alias(vecs->mpvec, devname);
704
705         if (!mpp) {
706                 condlog(3, "%s: devmap not registered, can't remove",
707                         devname);
708                 return 0;
709         }
710
711         condlog(2, "remove %s devmap", mpp->alias);
712         remove_map(mpp, vecs, stop_waiter_thread, 1);
713
714         return 0;
715 }
716
717 int
718 uev_add_path (char * devname, struct vectors * vecs)
719 {
720         struct path * pp;
721
722         pp = find_path_by_dev(vecs->pathvec, devname);
723
724         if (pp) {
725                 condlog(3, "%s: already in pathvec");
726                 return 1;
727         }
728         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
729                        devname, DI_SYSFS | DI_WWID);
730
731         if (!pp) {
732                 condlog(0, "%s: failed to store path info", devname);
733                 return 1;
734         }
735
736         condlog(2, "%s: path checker registered", devname);
737         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
738
739         if (pp->mpp) {
740                 condlog(4, "%s: ownership set to %s",
741                                 pp->dev_t, pp->mpp->alias);
742         } else {
743                 condlog(4, "%s: orphaned", pp->dev_t);
744                 orphan_path(pp);
745         }
746
747         return 0;
748 }
749
750 int
751 uev_remove_path (char * devname, struct vectors * vecs)
752 {
753         int i;
754         struct path * pp;
755
756         pp = find_path_by_dev(vecs->pathvec, devname);
757
758         if (!pp) {
759                 condlog(3, "%s: not in pathvec");
760                 return 1;
761         }
762
763         if (pp->mpp && pp->state == PATH_UP)
764                 update_queue_mode_del_path(pp->mpp);
765
766         condlog(2, "remove %s path checker", devname);
767         i = find_slot(vecs->pathvec, (void *)pp);
768         vector_del_slot(vecs->pathvec, i);
769         free_path(pp);
770
771         return 0;
772 }
773
774 int
775 show_paths (char ** r, int * len, struct vectors * vecs)
776 {
777         int i;
778         struct path * pp;
779         char * c;
780         char * reply;
781         int maxlen = INITIAL_REPLY_LEN;
782         int again = 1;
783
784         get_path_layout(vecs->pathvec);
785         reply = MALLOC(maxlen);
786
787         while (again) {
788                 if (!reply)
789                         return 1;
790
791                 c = reply;
792
793                 if (VECTOR_SIZE(vecs->pathvec) > 0)
794                         c += snprint_path_header(c, reply + maxlen - c,
795                                                  PRINT_PATH_CHECKER);
796
797                 vector_foreach_slot(vecs->pathvec, pp, i)
798                         c += snprint_path(c, reply + maxlen - c,
799                                           PRINT_PATH_CHECKER, pp);
800
801                 again = ((c - reply) == (maxlen - 1));
802
803                 if (again)
804                         reply = REALLOC(reply, maxlen *= 2);
805
806         }
807         *r = reply;
808         *len = (int)(c - reply + 1);
809         return 0;
810 }
811
812 int
813 show_maps (char ** r, int *len, struct vectors * vecs)
814 {
815         int i;
816         struct multipath * mpp;
817         char * c;
818         char * reply;
819         int maxlen = INITIAL_REPLY_LEN;
820         int again = 1;
821
822         get_map_layout(vecs->mpvec);
823         reply = MALLOC(maxlen);
824
825         while (again) {
826                 if (!reply)
827                         return 1;
828
829                 c = reply;
830                 if (VECTOR_SIZE(vecs->mpvec) > 0)
831                         c += snprint_map_header(c, reply + maxlen - c,
832                                                 PRINT_MAP_FAILBACK);
833
834                 vector_foreach_slot(vecs->mpvec, mpp, i)
835                         c += snprint_map(c, reply + maxlen - c,
836                                          PRINT_MAP_FAILBACK, mpp);
837
838                 again = ((c - reply) == (maxlen - 1));
839
840                 if (again)
841                         reply = REALLOC(reply, maxlen *= 2);
842         }
843         *r = reply;
844         *len = (int)(c - reply + 1);
845         return 0;
846 }
847
848 int
849 dump_pathvec (char ** r, int * len, struct vectors * vecs)
850 {
851         int i;
852         struct path * pp;
853         char * reply;
854         char * p;
855
856         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
857         reply = (char *)MALLOC(*len);
858         *r = reply;
859
860         if (!reply)
861                 return 1;
862
863         p = reply;
864
865         vector_foreach_slot (vecs->pathvec, pp, i) {
866                 memcpy((void *)p, pp, sizeof(struct path));
867                 p += sizeof(struct path);
868         }
869
870         /* return negative to hint caller not to add "ok" to the dump */
871         return -1;
872 }
873
874 static int
875 map_discovery (struct vectors * vecs)
876 {
877         int i;
878         struct multipath * mpp;
879
880         if (dm_get_maps(vecs->mpvec, "multipath"))
881                 return 1;
882
883         vector_foreach_slot (vecs->mpvec, mpp, i) {
884                 if (setup_multipath(vecs, mpp))
885                         return 1;
886                 start_waiter_thread(mpp, vecs);
887         }
888
889         return 0;
890 }
891
892 int
893 reconfigure (struct vectors * vecs)
894 {
895         struct config * old = conf;
896         struct multipath * mpp;
897         struct path * pp;
898         int i;
899
900         conf = NULL;
901
902         if (load_config(DEFAULT_CONFIGFILE)) {
903                 conf = old;
904                 condlog(2, "reconfigure failed, continue with old config");
905                 return 1;
906         }
907         conf->verbosity = old->verbosity;
908         free_config(old);
909
910         vector_foreach_slot (vecs->mpvec, mpp, i) {
911                 mpp->mpe = find_mpe(mpp->wwid);
912                 mpp->hwe = extract_hwe_from_path(mpp);
913                 adopt_paths(vecs, mpp);
914                 set_no_path_retry(mpp);
915         }
916         vector_foreach_slot (vecs->pathvec, pp, i) {
917                 select_checkfn(pp);
918                 select_getuid(pp);
919                 select_getprio(pp);
920         }
921         condlog(2, "reconfigured");
922         return 0;
923 }
924
925 int
926 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
927 {
928         struct vectors * vecs;
929         int r;
930         
931         *reply = NULL;
932         *len = 0;
933         vecs = (struct vectors *)trigger_data;
934
935         pthread_cleanup_push(cleanup_lock, vecs->lock);
936         lock(vecs->lock);
937
938         r = parse_cmd(str, reply, len, vecs);
939
940         if (r > 0) {
941                 *reply = STRDUP("fail\n");
942                 *len = strlen(*reply) + 1;
943                 r = 1;
944         }
945         else if (!r && *len == 0) {
946                 *reply = STRDUP("ok\n");
947                 *len = strlen(*reply) + 1;
948                 r = 0;
949         }
950         /* else if (r < 0) leave *reply alone */
951
952         lock_cleanup_pop(vecs->lock);
953         return r;
954 }
955
956 static int
957 uev_discard(char * devpath)
958 {
959         char a[10], b[10];
960
961         /*
962          * keep only block devices, discard partitions
963          */
964         if (sscanf(devpath, "/block/%10s", a) != 1 ||
965             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
966                 condlog(4, "discard event on %s", devpath);
967                 return 1;
968         }
969         return 0;
970 }
971
972 int 
973 uev_trigger (struct uevent * uev, void * trigger_data)
974 {
975         int r = 0;
976         char devname[32];
977         struct vectors * vecs;
978
979         vecs = (struct vectors *)trigger_data;
980
981         if (uev_discard(uev->devpath))
982                 return 1;
983
984         basename(uev->devpath, devname);
985         lock(vecs->lock);
986
987         /*
988          * device map add/remove event
989          */
990         if (!strncmp(devname, "dm-", 3)) {
991                 if (!strncmp(uev->action, "add", 3)) {
992                         r = uev_add_map(devname, vecs);
993                         goto out;
994                 }
995 #if 0
996                 if (!strncmp(uev->action, "remove", 6)) {
997                         r = uev_remove_map(devname, vecs);
998                         goto out;
999                 }
1000 #endif
1001                 goto out;
1002         }
1003         
1004         /*
1005          * path add/remove event
1006          */
1007         if (blacklist(conf->blist, devname))
1008                 goto out;
1009
1010         if (!strncmp(uev->action, "add", 3)) {
1011                 r = uev_add_path(devname, vecs);
1012                 goto out;
1013         }
1014         if (!strncmp(uev->action, "remove", 6)) {
1015                 r = uev_remove_path(devname, vecs);
1016                 goto out;
1017         }
1018
1019 out:
1020         unlock(vecs->lock);
1021         return r;
1022 }
1023
1024 static void *
1025 ueventloop (void * ap)
1026 {
1027         if (uevent_listen(&uev_trigger, ap))
1028                 fprintf(stderr, "error starting uevent listener");
1029                 
1030         return NULL;
1031 }
1032
1033 static void *
1034 uxlsnrloop (void * ap)
1035 {
1036         if (load_keys())
1037                 return NULL;
1038         
1039         if (alloc_handlers())
1040                 return NULL;
1041
1042         add_handler(LIST+PATHS, cli_list_paths);
1043         add_handler(LIST+MAPS, cli_list_maps);
1044         add_handler(ADD+PATH, cli_add_path);
1045         add_handler(DEL+PATH, cli_del_path);
1046         add_handler(ADD+MAP, cli_add_map);
1047         add_handler(DEL+MAP, cli_del_map);
1048         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
1049         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
1050         add_handler(RECONFIGURE, cli_reconfigure);
1051         add_handler(SUSPEND+MAP, cli_suspend);
1052         add_handler(RESUME+MAP, cli_resume);
1053         add_handler(REINSTATE+PATH, cli_reinstate);
1054         add_handler(FAIL+PATH, cli_fail);
1055
1056         uxsock_listen(&uxsock_trigger, ap);
1057
1058         return NULL;
1059 }
1060
1061 static int
1062 exit_daemon (int status)
1063 {
1064         if (status != 0)
1065                 fprintf(stderr, "bad exit status. see daemon.log\n");
1066
1067         condlog(3, "unlink pidfile");
1068         unlink(DEFAULT_PIDFILE);
1069
1070         lock(&exit_mutex);
1071         pthread_cond_signal(&exit_cond);
1072         unlock(&exit_mutex);
1073
1074         return status;
1075 }
1076
1077 static void
1078 fail_path (struct path * pp)
1079 {
1080         if (!pp->mpp)
1081                 return;
1082
1083         condlog(2, "checker failed path %s in map %s",
1084                  pp->dev_t, pp->mpp->alias);
1085
1086         dm_fail_path(pp->mpp->alias, pp->dev_t);
1087         update_queue_mode_del_path(pp->mpp);
1088 }
1089
1090 /*
1091  * caller must have locked the path list before calling that function
1092  */
1093 static void
1094 reinstate_path (struct path * pp)
1095 {
1096         if (!pp->mpp)
1097                 return;
1098
1099         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1100                 condlog(0, "%s: reinstate failed", pp->dev_t);
1101         else {
1102                 condlog(2, "%s: reinstated", pp->dev_t);
1103                 update_queue_mode_add_path(pp->mpp);
1104         }
1105 }
1106
1107 static void
1108 enable_group(struct path * pp)
1109 {
1110         struct pathgroup * pgp;
1111
1112         /*
1113          * if path is added through uev_add_path, pgindex can be unset.
1114          * next update_strings() will set it, upon map reload event.
1115          *
1116          * we can safely return here, because upon map reload, all
1117          * PG will be enabled.
1118          */
1119         if (!pp->mpp->pg || !pp->pgindex)
1120                 return;
1121
1122         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1123         
1124         if (pgp->status == PGSTATE_DISABLED) {
1125                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1126                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1127         }
1128 }
1129
1130 static void
1131 mpvec_garbage_collector (struct vectors * vecs)
1132 {
1133         struct multipath * mpp;
1134         int i;
1135
1136         vector_foreach_slot (vecs->mpvec, mpp, i) {
1137                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1138                         condlog(2, "%s: remove dead map", mpp->alias);
1139                         remove_map(mpp, vecs, stop_waiter_thread, 1);
1140                         i--;
1141                 }
1142         }
1143 }
1144
1145 static void
1146 defered_failback_tick (vector mpvec)
1147 {
1148         struct multipath * mpp;
1149         int i;
1150
1151         vector_foreach_slot (mpvec, mpp, i) {
1152                 /*
1153                  * defered failback getting sooner
1154                  */
1155                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1156                         mpp->failback_tick--;
1157
1158                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1159                                 switch_pathgroup(mpp);
1160                 }
1161         }
1162 }
1163
1164 static void
1165 retry_count_tick(vector mpvec)
1166 {
1167         struct multipath *mpp;
1168         int i;
1169
1170         vector_foreach_slot (mpvec, mpp, i) {
1171                 if (mpp->retry_tick) {
1172                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1173                         if(--mpp->retry_tick == 0) {
1174                                 dm_queue_if_no_path(mpp->alias, 0);
1175                                 condlog(2, "%s: Disable queueing", mpp->alias);
1176                         }
1177                 }
1178         }
1179 }
1180
1181 static void *
1182 checkerloop (void *ap)
1183 {
1184         struct vectors *vecs;
1185         struct path *pp;
1186         int i, count = 0;
1187         int newstate;
1188         char checker_msg[MAX_CHECKER_MSG_SIZE];
1189
1190         mlockall(MCL_CURRENT | MCL_FUTURE);
1191
1192         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1193         vecs = (struct vectors *)ap;
1194
1195         condlog(2, "path checkers start up");
1196
1197         /*
1198          * init the path check interval
1199          */
1200         vector_foreach_slot (vecs->pathvec, pp, i) {
1201                 pp->checkint = conf->checkint;
1202         }
1203
1204         while (1) {
1205                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1206                 lock(vecs->lock);
1207                 condlog(4, "tick");
1208
1209                 vector_foreach_slot (vecs->pathvec, pp, i) {
1210                         if (!pp->mpp)
1211                                 continue;
1212
1213                         if (pp->tick && --pp->tick)
1214                                 continue; /* don't check this path yet */
1215
1216                         /*
1217                          * provision a next check soonest,
1218                          * in case we exit abnormaly from here
1219                          */
1220                         pp->tick = conf->checkint;
1221                         
1222                         if (!pp->checkfn) {
1223                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1224                                 select_checkfn(pp);
1225                         }
1226
1227                         if (!pp->checkfn) {
1228                                 condlog(0, "%s: checkfn is void", pp->dev);
1229                                 continue;
1230                         }
1231                         newstate = pp->checkfn(pp->fd, checker_msg,
1232                                                &pp->checker_context);
1233                         
1234                         if (newstate < 0) {
1235                                 condlog(2, "%s: unusable path", pp->dev);
1236                                 pathinfo(pp, conf->hwtable, 0);
1237                                 continue;
1238                         }
1239
1240                         if (newstate != pp->state) {
1241                                 pp->state = newstate;
1242                                 LOG_MSG(1, checker_msg);
1243
1244                                 /*
1245                                  * upon state change, reset the checkint
1246                                  * to the shortest delay
1247                                  */
1248                                 pp->checkint = conf->checkint;
1249
1250                                 if (newstate == PATH_DOWN ||
1251                                     newstate == PATH_SHAKY ||
1252                                     update_multipath_strings(pp->mpp,
1253                                                              vecs->pathvec)) {
1254                                         /*
1255                                          * proactively fail path in the DM
1256                                          */
1257                                         fail_path(pp);
1258
1259                                         /*
1260                                          * cancel scheduled failback
1261                                          */
1262                                         pp->mpp->failback_tick = 0;
1263
1264                                         continue;
1265                                 }
1266
1267                                 /*
1268                                  * reinstate this path
1269                                  */
1270                                 reinstate_path(pp);
1271
1272                                 /*
1273                                  * schedule [defered] failback
1274                                  */
1275                                 if (pp->mpp->pgfailback > 0)
1276                                         pp->mpp->failback_tick =
1277                                                 pp->mpp->pgfailback + 1;
1278                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1279                                     need_switch_pathgroup(pp->mpp, 1))
1280                                         switch_pathgroup(pp->mpp);
1281
1282                                 /*
1283                                  * if at least one path is up in a group, and
1284                                  * the group is disabled, re-enable it
1285                                  */
1286                                 if (newstate == PATH_UP)
1287                                         enable_group(pp);
1288                         }
1289                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1290                                 LOG_MSG(4, checker_msg);
1291                                 /*
1292                                  * double the next check delay.
1293                                  * max at conf->max_checkint
1294                                  */
1295                                 if (pp->checkint < (conf->max_checkint / 2))
1296                                         pp->checkint = 2 * pp->checkint;
1297                                 else
1298                                         pp->checkint = conf->max_checkint;
1299
1300                                 pp->tick = pp->checkint;
1301                                 condlog(4, "%s: delay next check %is",
1302                                                 pp->dev_t, pp->tick);
1303
1304                         }
1305                         pp->state = newstate;
1306
1307                         /*
1308                          * path prio refreshing
1309                          */
1310                         condlog(4, "path prio refresh");
1311                         pathinfo(pp, conf->hwtable, DI_PRIO);
1312
1313                         if (need_switch_pathgroup(pp->mpp, 0)) {
1314                                 if (pp->mpp->pgfailback > 0)
1315                                         pp->mpp->failback_tick =
1316                                                 pp->mpp->pgfailback + 1;
1317                                 else if (pp->mpp->pgfailback ==
1318                                                 -FAILBACK_IMMEDIATE)
1319                                         switch_pathgroup(pp->mpp);
1320                         }
1321                 }
1322                 defered_failback_tick(vecs->mpvec);
1323                 retry_count_tick(vecs->mpvec);
1324
1325                 if (count)
1326                         count--;
1327                 else {
1328                         condlog(4, "map garbage collection");
1329                         mpvec_garbage_collector(vecs);
1330                         count = MAPGCINT;
1331                 }
1332                 
1333                 lock_cleanup_pop(vecs->lock);
1334                 sleep(1);
1335         }
1336         return NULL;
1337 }
1338
1339 static struct vectors *
1340 init_vecs (void)
1341 {
1342         struct vectors * vecs;
1343
1344         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1345
1346         if (!vecs)
1347                 return NULL;
1348
1349         vecs->lock = 
1350                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1351
1352         if (!vecs->lock)
1353                 goto out;
1354
1355         vecs->pathvec = vector_alloc();
1356
1357         if (!vecs->pathvec)
1358                 goto out1;
1359                 
1360         vecs->mpvec = vector_alloc();
1361
1362         if (!vecs->mpvec)
1363                 goto out2;
1364         
1365         pthread_mutex_init(vecs->lock, NULL);
1366
1367         return vecs;
1368
1369 out2:
1370         vector_free(vecs->pathvec);
1371 out1:
1372         FREE(vecs->lock);
1373 out:
1374         FREE(vecs);
1375         condlog(0, "failed to init paths");
1376         return NULL;
1377 }
1378
1379 static void *
1380 signal_set(int signo, void (*func) (int))
1381 {
1382         int r;
1383         struct sigaction sig;
1384         struct sigaction osig;
1385
1386         sig.sa_handler = func;
1387         sigemptyset(&sig.sa_mask);
1388         sig.sa_flags = 0;
1389
1390         r = sigaction(signo, &sig, &osig);
1391
1392         if (r < 0)
1393                 return (SIG_ERR);
1394         else
1395                 return (osig.sa_handler);
1396 }
1397
1398 static void
1399 sighup (int sig)
1400 {
1401         condlog(3, "SIGHUP received");
1402
1403 #ifdef _DEBUG_
1404         dbg_free_final(NULL);
1405 #endif
1406 }
1407
1408 static void
1409 sigend (int sig)
1410 {
1411         exit_daemon(0);
1412 }
1413
1414 static void
1415 signal_init(void)
1416 {
1417         signal_set(SIGHUP, sighup);
1418         signal_set(SIGINT, sigend);
1419         signal_set(SIGTERM, sigend);
1420         signal_set(SIGKILL, sigend);
1421 }
1422
1423 static void
1424 setscheduler (void)
1425 {
1426         int res;
1427         static struct sched_param sched_param = {
1428                 sched_priority: 99
1429         };
1430
1431         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1432
1433         if (res == -1)
1434                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1435         return;
1436 }
1437
1438 static void
1439 set_oom_adj (int val)
1440 {
1441         FILE *fp;
1442
1443         fp = fopen("/proc/self/oom_adj", "w");
1444
1445         if (!fp)
1446                 return;
1447
1448         fprintf(fp, "%i", val);
1449         fclose(fp);
1450 }
1451         
1452 static int
1453 child (void * param)
1454 {
1455         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1456         pthread_attr_t attr;
1457         struct vectors * vecs;
1458
1459         mlockall(MCL_CURRENT | MCL_FUTURE);
1460
1461         if (logsink)
1462                 log_thread_start();
1463
1464         condlog(2, "--------start up--------");
1465         condlog(2, "read " DEFAULT_CONFIGFILE);
1466
1467         if (load_config(DEFAULT_CONFIGFILE))
1468                 exit(1);
1469
1470         setlogmask(LOG_UPTO(conf->verbosity + 3));
1471
1472         /*
1473          * fill the voids left in the config file
1474          */
1475         if (!conf->checkint) {
1476                 conf->checkint = CHECKINT;
1477                 conf->max_checkint = MAX_CHECKINT;
1478         }
1479
1480         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1481                 if (logsink)
1482                         log_thread_stop();
1483
1484                 exit(1);
1485         }
1486         signal_init();
1487         setscheduler();
1488         set_oom_adj(-17);
1489         vecs = init_vecs();
1490
1491         if (!vecs)
1492                 exit(1);
1493
1494         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1495                 condlog(0, "can not find sysfs mount point");
1496                 exit(1);
1497         }
1498
1499         /*
1500          * fetch paths and multipaths lists
1501          * no paths and/or no multipaths are valid scenarii
1502          * vectors maintenance will be driven by events
1503          */
1504         path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
1505         map_discovery(vecs);
1506
1507         /*
1508          * start threads
1509          */
1510         pthread_attr_init(&attr);
1511         pthread_attr_setstacksize(&attr, 64 * 1024);
1512         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1513         
1514         pthread_create(&check_thr, &attr, checkerloop, vecs);
1515         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1516         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1517
1518         pthread_cond_wait(&exit_cond, &exit_mutex);
1519
1520         /*
1521          * exit path
1522          */
1523         lock(vecs->lock);
1524         remove_maps(vecs, stop_waiter_thread);
1525         free_pathvec(vecs->pathvec, FREE_PATHS);
1526
1527         pthread_cancel(check_thr);
1528         pthread_cancel(uevent_thr);
1529         pthread_cancel(uxlsnr_thr);
1530
1531         free_keys(keys);
1532         keys = NULL;
1533         free_handlers(handlers);
1534         handlers = NULL;
1535         free_polls();
1536
1537         unlock(vecs->lock);
1538         pthread_mutex_destroy(vecs->lock);
1539         FREE(vecs->lock);
1540         vecs->lock = NULL;
1541         FREE(vecs);
1542         vecs = NULL;
1543         free_config(conf);
1544         conf = NULL;
1545
1546         condlog(2, "--------shut down-------");
1547         
1548         if (logsink)
1549                 log_thread_stop();
1550
1551         dm_lib_release();
1552         dm_lib_exit();
1553
1554 #ifdef _DEBUG_
1555         dbg_free_final(NULL);
1556 #endif
1557
1558         exit(0);
1559 }
1560
1561 static int
1562 daemonize(void)
1563 {
1564         int pid;
1565         int in_fd, out_fd;
1566
1567         if( (pid = fork()) < 0){
1568                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1569                 return -1;
1570         }
1571         else if (pid != 0)
1572                 return pid;
1573
1574         setsid();
1575
1576         if ( (pid = fork()) < 0)
1577                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1578         else if (pid != 0)
1579                 _exit(0);
1580
1581         in_fd = open("/dev/null", O_RDONLY);
1582         if (in_fd < 0){
1583                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1584                         strerror(errno));
1585                 _exit(0);
1586         }
1587         out_fd = open("/dev/console", O_WRONLY);
1588         if (out_fd < 0){
1589                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1590                         strerror(errno));
1591                 _exit(0);
1592         }
1593
1594         close(STDIN_FILENO);
1595         dup(in_fd);
1596         close(STDOUT_FILENO);
1597         dup(out_fd);
1598         close(STDERR_FILENO);
1599         dup(out_fd);
1600
1601         close(in_fd);
1602         close(out_fd);
1603         chdir("/");
1604         umask(0);
1605         return 0;
1606 }
1607
1608 int
1609 main (int argc, char *argv[])
1610 {
1611         extern char *optarg;
1612         extern int optind;
1613         int arg;
1614         int err;
1615         
1616         logsink = 1;
1617
1618         if (getuid() != 0) {
1619                 fprintf(stderr, "need to be root\n");
1620                 exit(1);
1621         }
1622
1623         /* make sure we don't lock any path */
1624         chdir("/");
1625         umask(umask(077) | 022);
1626
1627         conf = alloc_config();
1628
1629         if (!conf)
1630                 exit(1);
1631
1632         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1633         switch(arg) {
1634                 case 'd':
1635                         logsink = 0;
1636                         //debug=1; /* ### comment me out ### */
1637                         break;
1638                 case 'v':
1639                         if (sizeof(optarg) > sizeof(char *) ||
1640                             !isdigit(optarg[0]))
1641                                 exit(1);
1642
1643                         conf->verbosity = atoi(optarg);
1644                         break;
1645                 case 'k':
1646                         uxclnt(optarg);
1647                         exit(0);
1648                 default:
1649                         ;
1650                 }
1651         }
1652
1653         if (!logsink)
1654                 err = 0;
1655         else
1656                 err = daemonize();
1657         
1658         if (err < 0)
1659                 /* error */
1660                 exit(1);
1661         else if (err > 0)
1662                 /* parent dies */
1663                 exit(0);
1664         else
1665                 /* child lives */
1666                 return (child(NULL));
1667 }