[libmultipath] split free_multipath_attributes() out of free_multipath()
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15
16 /*
17  * libsysfs
18  */
19 #include <sysfs/libsysfs.h>
20 #include <sysfs/dlist.h>
21
22 /*
23  * libcheckers
24  */
25 #include <checkers.h>
26 #include <path_state.h>
27
28 /*
29  * libmultipath
30  */
31 #include <parser.h>
32 #include <vector.h>
33 #include <memory.h>
34 #include <config.h>
35 #include <callout.h>
36 #include <util.h>
37 #include <blacklist.h>
38 #include <hwtable.h>
39 #include <defaults.h>
40 #include <structs.h>
41 #include <dmparser.h>
42 #include <devmapper.h>
43 #include <dict.h>
44 #include <discovery.h>
45 #include <debug.h>
46 #include <propsel.h>
47 #include <uevent.h>
48 #include <switchgroup.h>
49 #include <path_state.h>
50 #include <print.h>
51
52 #include "main.h"
53 #include "pidfile.h"
54 #include "uxlsnr.h"
55 #include "uxclnt.h"
56 #include "cli.h"
57 #include "cli_handlers.h"
58
59 #define FILE_NAME_SIZE 256
60 #define CMDSIZE 160
61
62 #define LOG_MSG(a,b) \
63         if (strlen(b)) { \
64                 condlog(a, "%s: %s", pp->dev_t, b); \
65                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
66         }
67
68 #ifdef LCKDBG
69 #define lock(a) \
70         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
71         pthread_mutex_lock(a)
72 #define unlock(a) \
73         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
74         pthread_mutex_unlock(a)
75 #define lock_cleanup_pop(a) \
76         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
77         pthread_cleanup_pop(1);
78 #else
79 #define lock(a) pthread_mutex_lock(a)
80 #define unlock(a) pthread_mutex_unlock(a)
81 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
82 #endif
83
84 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
85 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
86
87 /*
88  * structs
89  */
90 struct event_thread {
91         struct dm_task *dmt;
92         pthread_t thread;
93         int event_nr;
94         char mapname[WWID_SIZE];
95         struct vectors *vecs;
96 };
97
98 static struct event_thread *
99 alloc_waiter (void)
100 {
101
102         struct event_thread * wp;
103
104         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
105
106         return wp;
107 }
108
109 static void
110 free_waiter (void * data)
111 {
112         struct event_thread * wp = (struct event_thread *)data;
113
114         if (wp->dmt)
115                 dm_task_destroy(wp->dmt);
116         FREE(wp);
117 }
118
119 static void
120 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
121 {
122         struct event_thread * wp = (struct event_thread *)mpp->waiter;
123         pthread_t thread;
124         
125         if (!wp) {
126                 condlog(3, "%s: no waiter thread", mpp->alias);
127                 return;
128         }
129         thread = wp->thread;
130
131         if (!wp) {
132                 condlog(3, "%s: thread not started", mpp->alias);
133                 return;
134         }
135         condlog(2, "%s: stop event checker thread", wp->mapname);
136         pthread_kill(thread, SIGHUP);
137 }
138
139 static void
140 cleanup_lock (void * data)
141 {
142         pthread_mutex_unlock((pthread_mutex_t *)data);
143 }
144
145 static void
146 adopt_paths (struct vectors * vecs, struct multipath * mpp)
147 {
148         int i;
149         struct path * pp;
150
151         if (!mpp)
152                 return;
153
154         vector_foreach_slot (vecs->pathvec, pp, i) {
155                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
156                         condlog(4, "%s ownership set", pp->dev_t);
157                         pp->mpp = mpp;
158                 }
159         }
160 }
161
162 static void
163 orphan_path (struct path * pp)
164 {
165         pp->mpp = NULL;
166         pp->checkfn = NULL;
167         pp->dmstate = PSTATE_UNDEF;
168         pp->checker_context = NULL;
169         pp->getuid = NULL;
170         pp->getprio = NULL;
171         pp->getprio_selected = 0;
172
173         if (pp->fd >= 0)
174                 close(pp->fd);
175
176         pp->fd = -1;
177 }
178
179 static void
180 orphan_paths (struct vectors * vecs, struct multipath * mpp)
181 {
182         int i;
183         struct path * pp;
184
185         vector_foreach_slot (vecs->pathvec, pp, i) {
186                 if (pp->mpp == mpp) {
187                         condlog(4, "%s is orphaned", pp->dev_t);
188                         orphan_path(pp);
189                 }
190         }
191 }
192
193 static int
194 update_multipath_table (struct multipath *mpp, vector pathvec)
195 {
196         if (!mpp)
197                 return 1;
198
199         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
200                 return 1;
201
202         if (disassemble_map(pathvec, mpp->params, mpp))
203                 return 1;
204
205         return 0;
206 }
207
208 static int
209 update_multipath_status (struct multipath *mpp)
210 {
211         if (!mpp)
212                 return 1;
213
214         if(dm_get_status(mpp->alias, mpp->status))
215                 return 1;
216
217         if (disassemble_status(mpp->status, mpp))
218                 return 1;
219
220         return 0;
221 }
222
223 static int
224 update_multipath_strings (struct multipath *mpp, vector pathvec)
225 {
226         free_multipath_attributes(mpp);
227         free_pgvec(mpp->pg, KEEP_PATHS);
228         mpp->pg = NULL;
229
230         if (update_multipath_table(mpp, pathvec))
231                 return 1;
232
233         if (update_multipath_status(mpp))
234                 return 1;
235
236         return 0;
237 }
238
239 static void
240 set_multipath_wwid (struct multipath * mpp)
241 {
242         if (mpp->wwid)
243                 return;
244
245         dm_get_uuid(mpp->alias, mpp->wwid);
246 }
247
248 /*
249  * mpp->no_path_retry:
250  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
251  *   -1 (FAIL)  : fail_if_no_path
252  *    0 (UNDEF) : nothing
253  *   >0         : queue_if_no_path enabled, turned off after polling n times
254  */
255 static void
256 update_queue_mode_del_path(struct multipath *mpp)
257 {
258         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
259                 /*
260                  * Enter retry mode.
261                  * meaning of +1: retry_tick may be decremented in
262                  *                checkerloop before starting retry.
263                  */
264                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
265                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
266                         mpp->alias, mpp->no_path_retry);
267         }
268         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
269 }
270
271 static void
272 update_queue_mode_add_path(struct multipath *mpp)
273 {
274         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
275                 /* come back to normal mode from retry mode */
276                 mpp->retry_tick = 0;
277                 dm_queue_if_no_path(mpp->alias, 1);
278                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
279                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
280         }
281         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
282 }
283
284 static void
285 set_no_path_retry(struct multipath *mpp)
286 {
287         mpp->retry_tick = 0;
288         mpp->nr_active = pathcount(mpp, PATH_UP);
289         select_no_path_retry(mpp);
290
291         switch (mpp->no_path_retry) {
292         case NO_PATH_RETRY_UNDEF:
293                 break;
294         case NO_PATH_RETRY_FAIL:
295                 dm_queue_if_no_path(mpp->alias, 0);
296                 break;
297         case NO_PATH_RETRY_QUEUE:
298                 dm_queue_if_no_path(mpp->alias, 1);
299                 break;
300         default:
301                 dm_queue_if_no_path(mpp->alias, 1);
302                 if (mpp->nr_active == 0) {
303                         /* Enter retry mode */
304                         mpp->retry_tick = mpp->no_path_retry * conf->checkint;
305                         condlog(1, "%s: Entering recovery mode: max_retries=%d",
306                                 mpp->alias, mpp->no_path_retry);
307                 }
308                 break;
309         }
310 }
311
312 static struct hwentry *
313 extract_hwe_from_path(struct multipath * mpp)
314 {
315         struct path * pp;
316         struct pathgroup * pgp;
317
318         pgp = VECTOR_SLOT(mpp->pg, 0);
319         pp = VECTOR_SLOT(pgp->paths, 0);
320
321         return pp->hwe;
322 }
323
324 static void
325 remove_map (struct multipath * mpp, struct vectors * vecs)
326 {
327         int i;
328
329         stop_waiter_thread(mpp, vecs);
330
331         /*
332          * clear references to this map
333          */
334         orphan_paths(vecs, mpp);
335
336         /*
337          * purge the multipath vector
338          */
339         i = find_slot(vecs->mpvec, (void *)mpp);
340         vector_del_slot(vecs->mpvec, i);
341
342         /*
343          * final free
344          */
345         free_multipath(mpp, KEEP_PATHS);
346         mpp = NULL;
347 }
348
349 static void
350 remove_maps (struct vectors * vecs)
351 {
352         int i;
353         struct multipath * mpp;
354
355         vector_foreach_slot (vecs->mpvec, mpp, i) {
356                 remove_map(mpp, vecs);
357                 i--;
358         }
359
360         vector_free(vecs->mpvec);
361         vecs->mpvec = NULL;
362 }
363
364 static int
365 setup_multipath (struct vectors * vecs, struct multipath * mpp)
366 {
367         if (dm_get_info(mpp->alias, &mpp->dmi))
368                 goto out;
369
370         set_multipath_wwid(mpp);
371         mpp->mpe = find_mpe(mpp->wwid);
372         condlog(4, "discovered map %s", mpp->alias);
373
374         if (update_multipath_strings(mpp, vecs->pathvec))
375                 goto out;
376
377         adopt_paths(vecs, mpp);
378         mpp->hwe = extract_hwe_from_path(mpp);
379         select_pgfailback(mpp);
380         set_no_path_retry(mpp);
381
382         return 0;
383 out:
384         condlog(0, "%s: failed to setup multipath", mpp->alias);
385         remove_map(mpp, vecs);
386         return 1;
387 }
388
389 static int
390 need_switch_pathgroup (struct multipath * mpp, int refresh)
391 {
392         struct pathgroup * pgp;
393         struct path * pp;
394         int i, j;
395
396         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
397                 return 0;
398
399         /*
400          * Refresh path priority values
401          */
402         if (refresh)
403                 vector_foreach_slot (mpp->pg, pgp, i)
404                         vector_foreach_slot (pgp->paths, pp, j)
405                                 pathinfo(pp, conf->hwtable, DI_PRIO);
406
407         mpp->bestpg = select_path_group(mpp);
408
409         if (mpp->bestpg != mpp->nextpg)
410                 return 1;
411
412         return 0;
413 }
414
415 static void
416 switch_pathgroup (struct multipath * mpp)
417 {
418         dm_switchgroup(mpp->alias, mpp->bestpg);
419         condlog(2, "%s: switch to path group #%i",
420                  mpp->alias, mpp->bestpg);
421 }
422
423 static int
424 update_multipath (struct vectors *vecs, char *mapname)
425 {
426         struct multipath *mpp;
427         struct pathgroup  *pgp;
428         struct path *pp;
429         int i, j;
430         int r = 1;
431
432         mpp = find_mp_by_alias(vecs->mpvec, mapname);
433
434         if (!mpp)
435                 goto out;
436
437         free_pgvec(mpp->pg, KEEP_PATHS);
438         mpp->pg = NULL;
439
440         if (setup_multipath(vecs, mpp))
441                 goto out; /* mpp freed in setup_multipath */
442
443         /*
444          * compare checkers states with DM states
445          */
446         vector_foreach_slot (mpp->pg, pgp, i) {
447                 vector_foreach_slot (pgp->paths, pp, j) {
448                         if (pp->dmstate != PSTATE_FAILED)
449                                 continue;
450
451                         if (pp->state != PATH_DOWN) {
452                                 condlog(2, "%s: mark as failed", pp->dev_t);
453                                 pp->state = PATH_DOWN;
454                                 update_queue_mode_del_path(mpp);
455
456                                 /*
457                                  * if opportune,
458                                  * schedule the next check earlier
459                                  */
460                                 if (pp->tick > conf->checkint)
461                                         pp->tick = conf->checkint;
462                         }
463                 }
464         }
465         r = 0;
466 out:
467         if (r)
468                 condlog(0, "failed to update multipath");
469
470         return r;
471 }
472
473 static sigset_t unblock_sighup(void)
474 {
475         sigset_t set, old;
476
477         sigemptyset(&set);
478         sigaddset(&set, SIGHUP);
479         pthread_sigmask(SIG_UNBLOCK, &set, &old);
480         return old;
481 }
482
483 /*
484  * returns the reschedule delay
485  * negative means *stop*
486  */
487 static int
488 waiteventloop (struct event_thread * waiter)
489 {
490         sigset_t set;
491         int event_nr;
492         int r;
493
494         if (!waiter->event_nr)
495                 waiter->event_nr = dm_geteventnr(waiter->mapname);
496
497         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
498                 return 1;
499
500         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
501                 dm_task_destroy(waiter->dmt);
502                 return 1;
503         }
504
505         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
506                                                       waiter->event_nr)) {
507                 dm_task_destroy(waiter->dmt);
508                 return 1;
509         }
510
511         dm_task_no_open_count(waiter->dmt);
512         
513         /* accept wait interruption */
514         set = unblock_sighup();
515
516         /* interruption spits messages */
517         dm_shut_log();
518
519         /* wait */
520         r = dm_task_run(waiter->dmt);
521
522         /* wait is over : event or interrupt */
523         pthread_sigmask(SIG_SETMASK, &set, NULL);
524         //dm_restore_log();
525
526         if (!r) /* wait interrupted by signal */
527                 return -1;
528
529         dm_task_destroy(waiter->dmt);
530         waiter->dmt = NULL;
531         waiter->event_nr++;
532
533         /*
534          * upon event ...
535          */
536         while (1) {
537                 condlog(3, "%s: devmap event #%i",
538                                 waiter->mapname, waiter->event_nr);
539
540                 /*
541                  * event might be :
542                  *
543                  * 1) a table reload, which means our mpp structure is
544                  *    obsolete : refresh it through update_multipath()
545                  * 2) a path failed by DM : mark as such through
546                  *    update_multipath()
547                  * 3) map has gone away : stop the thread.
548                  * 4) a path reinstate : nothing to do
549                  * 5) a switch group : nothing to do
550                  */
551                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
552                 lock(waiter->vecs->lock);
553                 r = update_multipath(waiter->vecs, waiter->mapname);
554                 lock_cleanup_pop(waiter->vecs->lock);
555
556                 if (r)
557                         return -1; /* stop the thread */
558
559                 event_nr = dm_geteventnr(waiter->mapname);
560
561                 if (waiter->event_nr == event_nr)
562                         return 1; /* upon problem reschedule 1s later */
563
564                 waiter->event_nr = event_nr;
565         }
566         return -1; /* never reach there */
567 }
568
569 static void *
570 waitevent (void * et)
571 {
572         int r;
573         struct event_thread *waiter;
574
575         mlockall(MCL_CURRENT | MCL_FUTURE);
576
577         waiter = (struct event_thread *)et;
578         pthread_cleanup_push(free_waiter, et);
579
580         while (1) {
581                 r = waiteventloop(waiter);
582
583                 if (r < 0)
584                         break;
585
586                 sleep(r);
587         }
588
589         pthread_cleanup_pop(1);
590         return NULL;
591 }
592
593 static int
594 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
595 {
596         pthread_attr_t attr;
597         struct event_thread * wp;
598
599         if (!mpp)
600                 return 0;
601
602         if (pthread_attr_init(&attr))
603                 goto out;
604
605         pthread_attr_setstacksize(&attr, 32 * 1024);
606         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
607
608         wp = alloc_waiter();
609
610         if (!wp)
611                 goto out;
612
613         mpp->waiter = (void *)wp;
614         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
615         wp->vecs = vecs;
616
617         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
618                 condlog(0, "%s: cannot create event checker", wp->mapname);
619                 goto out1;
620         }
621         condlog(2, "%s: event checker started", wp->mapname);
622
623         return 0;
624 out1:
625         free_waiter(wp);
626         mpp->waiter = NULL;
627 out:
628         condlog(0, "failed to start waiter thread");
629         return 1;
630 }
631
632 int
633 uev_add_map (char * devname, struct vectors * vecs)
634 {
635         int major, minor;
636         char dev_t[BLK_DEV_SIZE];
637         char * alias;
638         struct multipath * mpp;
639
640         if (sscanf(devname, "dm-%d", &minor) == 1 &&
641             !sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE) &&
642             sscanf(dev_t, "%d:%d", &major, &minor) == 2)
643                 alias = dm_mapname(major, minor);
644         else
645                 alias = STRDUP(devname);
646                 
647         if (!alias)
648                 return 1;
649         
650         if (!dm_type(alias, DEFAULT_TARGET)) {
651                 condlog(4, "%s: not a multipath map", alias);
652                 FREE(alias);
653                 return 0;
654         }
655
656         mpp = find_mp_by_alias(vecs->mpvec, alias);
657
658         if (mpp) {
659                 /*
660                  * this should not happen,
661                  * we missed a remove map event (not sent ?)
662                  */
663                 condlog(2, "%s: already registered", alias);
664                 remove_map(mpp, vecs);
665         }
666
667         /*
668          * now we can allocate
669          */
670         mpp = alloc_multipath();
671
672         if (!mpp)
673                 return 1;
674
675         mpp->alias = alias;
676
677         if (setup_multipath(vecs, mpp))
678                 return 1; /* mpp freed in setup_multipath */
679
680         if (!vector_alloc_slot(vecs->mpvec))
681                 goto out;
682
683         vector_set_slot(vecs->mpvec, mpp);
684         adopt_paths(vecs, mpp);
685
686         if (start_waiter_thread(mpp, vecs))
687                 goto out;
688
689         return 0;
690 out:
691         condlog(2, "%s: add devmap failed", mpp->alias);
692         remove_map(mpp, vecs);
693         return 1;
694 }
695
696 int
697 uev_remove_map (char * devname, struct vectors * vecs)
698 {
699         int minor;
700         struct multipath * mpp;
701
702         if (sscanf(devname, "dm-%d", &minor) == 1)
703                 mpp = find_mp_by_minor(vecs->mpvec, minor);
704         else
705                 mpp = find_mp_by_alias(vecs->mpvec, devname);
706
707         if (!mpp) {
708                 condlog(3, "%s: devmap not registered, can't remove",
709                         devname);
710                 return 0;
711         }
712
713         condlog(2, "remove %s devmap", mpp->alias);
714         remove_map(mpp, vecs);
715
716         return 0;
717 }
718
719 int
720 uev_add_path (char * devname, struct vectors * vecs)
721 {
722         struct path * pp;
723
724         pp = find_path_by_dev(vecs->pathvec, devname);
725
726         if (pp) {
727                 condlog(3, "%s: already in pathvec");
728                 return 1;
729         }
730         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
731                        devname, DI_SYSFS | DI_WWID);
732
733         if (!pp) {
734                 condlog(0, "%s: failed to store path info", devname);
735                 return 1;
736         }
737
738         condlog(2, "%s: path checker registered", devname);
739         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
740
741         if (pp->mpp) {
742                 condlog(4, "%s: ownership set to %s",
743                                 pp->dev_t, pp->mpp->alias);
744         } else {
745                 condlog(4, "%s: orphaned", pp->dev_t);
746                 orphan_path(pp);
747         }
748
749         return 0;
750 }
751
752 int
753 uev_remove_path (char * devname, struct vectors * vecs)
754 {
755         int i;
756         struct path * pp;
757
758         pp = find_path_by_dev(vecs->pathvec, devname);
759
760         if (!pp) {
761                 condlog(3, "%s: not in pathvec");
762                 return 1;
763         }
764
765         if (pp->mpp && pp->state == PATH_UP)
766                 update_queue_mode_del_path(pp->mpp);
767
768         condlog(2, "remove %s path checker", devname);
769         i = find_slot(vecs->pathvec, (void *)pp);
770         vector_del_slot(vecs->pathvec, i);
771         free_path(pp);
772
773         return 0;
774 }
775
776 int
777 show_paths (char ** r, int * len, struct vectors * vecs)
778 {
779         int i;
780         struct path * pp;
781         char * c;
782         char * reply;
783         struct path_layout pl;
784         int maxlen = INITIAL_REPLY_LEN;
785         int again = 1;
786
787         get_path_layout(&pl, vecs->pathvec);
788         reply = MALLOC(maxlen);
789
790         while (again) {
791                 if (!reply)
792                         return 1;
793
794                 c = reply;
795
796                 if (VECTOR_SIZE(vecs->pathvec) > 0)
797                         c += snprint_path_header(c, reply + maxlen - c,
798                                                  PRINT_PATH_CHECKER, &pl);
799
800                 vector_foreach_slot(vecs->pathvec, pp, i)
801                         c += snprint_path(c, reply + maxlen - c,
802                                           PRINT_PATH_CHECKER, pp, &pl);
803
804                 again = ((c - reply) == (maxlen - 1));
805
806                 if (again)
807                         reply = REALLOC(reply, maxlen *= 2);
808
809         }
810         *r = reply;
811         *len = (int)(c - reply + 1);
812         return 0;
813 }
814
815 int
816 show_maps (char ** r, int *len, struct vectors * vecs)
817 {
818         int i;
819         struct multipath * mpp;
820         char * c;
821         char * reply;
822         struct map_layout ml;
823         int maxlen = INITIAL_REPLY_LEN;
824         int again = 1;
825
826         get_map_layout(&ml, vecs->mpvec);
827         reply = MALLOC(maxlen);
828
829         while (again) {
830                 if (!reply)
831                         return 1;
832
833                 c = reply;
834                 if (VECTOR_SIZE(vecs->mpvec) > 0)
835                         c += snprint_map_header(c, reply + maxlen - c,
836                                                 PRINT_MAP_FAILBACK, &ml);
837
838                 vector_foreach_slot(vecs->mpvec, mpp, i)
839                         c += snprint_map(c, reply + maxlen - c,
840                                          PRINT_MAP_FAILBACK, mpp, &ml);
841
842                 again = ((c - reply) == (maxlen - 1));
843
844                 if (again)
845                         reply = REALLOC(reply, maxlen *= 2);
846         }
847         *r = reply;
848         *len = (int)(c - reply + 1);
849         return 0;
850 }
851
852 int
853 dump_pathvec (char ** r, int * len, struct vectors * vecs)
854 {
855         int i;
856         struct path * pp;
857         char * reply;
858         char * p;
859
860         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
861         reply = (char *)MALLOC(*len);
862         *r = reply;
863
864         if (!reply)
865                 return 1;
866
867         p = reply;
868
869         vector_foreach_slot (vecs->pathvec, pp, i) {
870                 memcpy((void *)p, pp, sizeof(struct path));
871                 p += sizeof(struct path);
872         }
873
874         /* return negative to hint caller not to add "ok" to the dump */
875         return -1;
876 }
877
878 static int
879 map_discovery (struct vectors * vecs)
880 {
881         int i;
882         struct multipath * mpp;
883
884         if (dm_get_maps(vecs->mpvec, "multipath"))
885                 return 1;
886
887         vector_foreach_slot (vecs->mpvec, mpp, i) {
888                 if (setup_multipath(vecs, mpp))
889                         return 1;
890                 start_waiter_thread(mpp, vecs);
891         }
892
893         return 0;
894 }
895
896 int
897 reconfigure (struct vectors * vecs)
898 {
899         struct config * old = conf;
900         struct multipath * mpp;
901         struct path * pp;
902         int i;
903
904         conf = NULL;
905
906         if (load_config(DEFAULT_CONFIGFILE)) {
907                 conf = old;
908                 condlog(2, "reconfigure failed, continue with old config");
909                 return 1;
910         }
911         conf->verbosity = old->verbosity;
912         free_config(old);
913
914         vector_foreach_slot (vecs->mpvec, mpp, i) {
915                 mpp->mpe = find_mpe(mpp->wwid);
916                 mpp->hwe = extract_hwe_from_path(mpp);
917                 adopt_paths(vecs, mpp);
918                 set_no_path_retry(mpp);
919         }
920         vector_foreach_slot (vecs->pathvec, pp, i) {
921                 select_checkfn(pp);
922                 select_getuid(pp);
923                 select_getprio(pp);
924         }
925         condlog(2, "reconfigured");
926         return 0;
927 }
928
929 int
930 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
931 {
932         struct vectors * vecs;
933         int r;
934         
935         *reply = NULL;
936         *len = 0;
937         vecs = (struct vectors *)trigger_data;
938
939         pthread_cleanup_push(cleanup_lock, vecs->lock);
940         lock(vecs->lock);
941
942         r = parse_cmd(str, reply, len, vecs);
943
944         if (r > 0) {
945                 *reply = STRDUP("fail\n");
946                 *len = strlen(*reply) + 1;
947                 r = 1;
948         }
949         else if (!r && *len == 0) {
950                 *reply = STRDUP("ok\n");
951                 *len = strlen(*reply) + 1;
952                 r = 0;
953         }
954         /* else if (r < 0) leave *reply alone */
955
956         lock_cleanup_pop(vecs->lock);
957         return r;
958 }
959
960 static int
961 uev_discard(char * devpath)
962 {
963         char a[10], b[10];
964
965         /*
966          * keep only block devices, discard partitions
967          */
968         if (sscanf(devpath, "/block/%10s", a) != 1 ||
969             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
970                 condlog(4, "discard event on %s", devpath);
971                 return 1;
972         }
973         return 0;
974 }
975
976 int 
977 uev_trigger (struct uevent * uev, void * trigger_data)
978 {
979         int r = 0;
980         char devname[32];
981         struct vectors * vecs;
982
983         vecs = (struct vectors *)trigger_data;
984
985         if (uev_discard(uev->devpath))
986                 return 1;
987
988         basename(uev->devpath, devname);
989         lock(vecs->lock);
990
991         /*
992          * device map add/remove event
993          */
994         if (!strncmp(devname, "dm-", 3)) {
995                 if (!strncmp(uev->action, "add", 3)) {
996                         r = uev_add_map(devname, vecs);
997                         goto out;
998                 }
999 #if 0
1000                 if (!strncmp(uev->action, "remove", 6)) {
1001                         r = uev_remove_map(devname, vecs);
1002                         goto out;
1003                 }
1004 #endif
1005                 goto out;
1006         }
1007         
1008         /*
1009          * path add/remove event
1010          */
1011         if (blacklist(conf->blist, devname))
1012                 goto out;
1013
1014         if (!strncmp(uev->action, "add", 3)) {
1015                 r = uev_add_path(devname, vecs);
1016                 goto out;
1017         }
1018         if (!strncmp(uev->action, "remove", 6)) {
1019                 r = uev_remove_path(devname, vecs);
1020                 goto out;
1021         }
1022
1023 out:
1024         unlock(vecs->lock);
1025         return r;
1026 }
1027
1028 static void *
1029 ueventloop (void * ap)
1030 {
1031         if (uevent_listen(&uev_trigger, ap))
1032                 fprintf(stderr, "error starting uevent listener");
1033                 
1034         return NULL;
1035 }
1036
1037 static void *
1038 uxlsnrloop (void * ap)
1039 {
1040         if (load_keys())
1041                 return NULL;
1042         
1043         if (alloc_handlers())
1044                 return NULL;
1045
1046         add_handler(LIST+PATHS, cli_list_paths);
1047         add_handler(LIST+MAPS, cli_list_maps);
1048         add_handler(ADD+PATH, cli_add_path);
1049         add_handler(DEL+PATH, cli_del_path);
1050         add_handler(ADD+MAP, cli_add_map);
1051         add_handler(DEL+MAP, cli_del_map);
1052         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
1053         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
1054         add_handler(RECONFIGURE, cli_reconfigure);
1055         add_handler(SUSPEND+MAP, cli_suspend);
1056         add_handler(RESUME+MAP, cli_resume);
1057         add_handler(REINSTATE+PATH, cli_reinstate);
1058         add_handler(FAIL+PATH, cli_fail);
1059
1060         uxsock_listen(&uxsock_trigger, ap);
1061
1062         return NULL;
1063 }
1064
1065 static int
1066 exit_daemon (int status)
1067 {
1068         if (status != 0)
1069                 fprintf(stderr, "bad exit status. see daemon.log\n");
1070
1071         condlog(3, "unlink pidfile");
1072         unlink(DEFAULT_PIDFILE);
1073
1074         lock(&exit_mutex);
1075         pthread_cond_signal(&exit_cond);
1076         unlock(&exit_mutex);
1077
1078         return status;
1079 }
1080
1081 static void
1082 fail_path (struct path * pp)
1083 {
1084         if (!pp->mpp)
1085                 return;
1086
1087         condlog(2, "checker failed path %s in map %s",
1088                  pp->dev_t, pp->mpp->alias);
1089
1090         dm_fail_path(pp->mpp->alias, pp->dev_t);
1091         update_queue_mode_del_path(pp->mpp);
1092 }
1093
1094 /*
1095  * caller must have locked the path list before calling that function
1096  */
1097 static void
1098 reinstate_path (struct path * pp)
1099 {
1100         if (!pp->mpp)
1101                 return;
1102
1103         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1104                 condlog(0, "%s: reinstate failed", pp->dev_t);
1105         else {
1106                 condlog(2, "%s: reinstated", pp->dev_t);
1107                 update_queue_mode_add_path(pp->mpp);
1108         }
1109 }
1110
1111 static void
1112 enable_group(struct path * pp)
1113 {
1114         struct pathgroup * pgp;
1115
1116         /*
1117          * if path is added through uev_add_path, pgindex can be unset.
1118          * next update_strings() will set it, upon map reload event.
1119          *
1120          * we can safely return here, because upon map reload, all
1121          * PG will be enabled.
1122          */
1123         if (!pp->mpp->pg || !pp->pgindex)
1124                 return;
1125
1126         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1127         
1128         if (pgp->status == PGSTATE_DISABLED) {
1129                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1130                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1131         }
1132 }
1133
1134 static void
1135 mpvec_garbage_collector (struct vectors * vecs)
1136 {
1137         struct multipath * mpp;
1138         int i;
1139
1140         vector_foreach_slot (vecs->mpvec, mpp, i) {
1141                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1142                         condlog(2, "%s: remove dead map", mpp->alias);
1143                         remove_map(mpp, vecs);
1144                         i--;
1145                 }
1146         }
1147 }
1148
1149 static void
1150 defered_failback_tick (vector mpvec)
1151 {
1152         struct multipath * mpp;
1153         int i;
1154
1155         vector_foreach_slot (mpvec, mpp, i) {
1156                 /*
1157                  * defered failback getting sooner
1158                  */
1159                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1160                         mpp->failback_tick--;
1161
1162                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1163                                 switch_pathgroup(mpp);
1164                 }
1165         }
1166 }
1167
1168 static void
1169 retry_count_tick(vector mpvec)
1170 {
1171         struct multipath *mpp;
1172         int i;
1173
1174         vector_foreach_slot (mpvec, mpp, i) {
1175                 if (mpp->retry_tick) {
1176                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1177                         if(--mpp->retry_tick == 0) {
1178                                 dm_queue_if_no_path(mpp->alias, 0);
1179                                 condlog(2, "%s: Disable queueing", mpp->alias);
1180                         }
1181                 }
1182         }
1183 }
1184
1185 static void *
1186 checkerloop (void *ap)
1187 {
1188         struct vectors *vecs;
1189         struct path *pp;
1190         int i, count = 0;
1191         int newstate;
1192         char checker_msg[MAX_CHECKER_MSG_SIZE];
1193
1194         mlockall(MCL_CURRENT | MCL_FUTURE);
1195
1196         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1197         vecs = (struct vectors *)ap;
1198
1199         condlog(2, "path checkers start up");
1200
1201         /*
1202          * init the path check interval
1203          */
1204         vector_foreach_slot (vecs->pathvec, pp, i) {
1205                 pp->checkint = conf->checkint;
1206         }
1207
1208         while (1) {
1209                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1210                 lock(vecs->lock);
1211                 condlog(4, "tick");
1212
1213                 vector_foreach_slot (vecs->pathvec, pp, i) {
1214                         if (!pp->mpp)
1215                                 continue;
1216
1217                         if (pp->tick && --pp->tick)
1218                                 continue; /* don't check this path yet */
1219
1220                         /*
1221                          * provision a next check soonest,
1222                          * in case we exit abnormaly from here
1223                          */
1224                         pp->tick = conf->checkint;
1225                         
1226                         if (!pp->checkfn) {
1227                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1228                                 select_checkfn(pp);
1229                         }
1230
1231                         if (!pp->checkfn) {
1232                                 condlog(0, "%s: checkfn is void", pp->dev);
1233                                 continue;
1234                         }
1235                         newstate = pp->checkfn(pp->fd, checker_msg,
1236                                                &pp->checker_context);
1237                         
1238                         if (newstate < 0) {
1239                                 condlog(2, "%s: unusable path", pp->dev);
1240                                 pathinfo(pp, conf->hwtable, 0);
1241                                 continue;
1242                         }
1243
1244                         if (newstate != pp->state) {
1245                                 pp->state = newstate;
1246                                 LOG_MSG(1, checker_msg);
1247
1248                                 /*
1249                                  * upon state change, reset the checkint
1250                                  * to the shortest delay
1251                                  */
1252                                 pp->checkint = conf->checkint;
1253
1254                                 if (newstate == PATH_DOWN ||
1255                                     newstate == PATH_SHAKY ||
1256                                     update_multipath_strings(pp->mpp,
1257                                                              vecs->pathvec)) {
1258                                         /*
1259                                          * proactively fail path in the DM
1260                                          */
1261                                         fail_path(pp);
1262
1263                                         /*
1264                                          * cancel scheduled failback
1265                                          */
1266                                         pp->mpp->failback_tick = 0;
1267
1268                                         continue;
1269                                 }
1270
1271                                 /*
1272                                  * reinstate this path
1273                                  */
1274                                 reinstate_path(pp);
1275
1276                                 /*
1277                                  * schedule [defered] failback
1278                                  */
1279                                 if (pp->mpp->pgfailback > 0)
1280                                         pp->mpp->failback_tick =
1281                                                 pp->mpp->pgfailback + 1;
1282                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1283                                     need_switch_pathgroup(pp->mpp, 1))
1284                                         switch_pathgroup(pp->mpp);
1285
1286                                 /*
1287                                  * if at least one path is up in a group, and
1288                                  * the group is disabled, re-enable it
1289                                  */
1290                                 if (newstate == PATH_UP)
1291                                         enable_group(pp);
1292                         }
1293                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1294                                 LOG_MSG(4, checker_msg);
1295                                 /*
1296                                  * double the next check delay.
1297                                  * max at conf->max_checkint
1298                                  */
1299                                 if (pp->checkint < (conf->max_checkint / 2))
1300                                         pp->checkint = 2 * pp->checkint;
1301                                 else
1302                                         pp->checkint = conf->max_checkint;
1303
1304                                 pp->tick = pp->checkint;
1305                                 condlog(4, "%s: delay next check %is",
1306                                                 pp->dev_t, pp->tick);
1307
1308                         }
1309                         pp->state = newstate;
1310
1311                         /*
1312                          * path prio refreshing
1313                          */
1314                         condlog(4, "path prio refresh");
1315                         pathinfo(pp, conf->hwtable, DI_PRIO);
1316
1317                         if (need_switch_pathgroup(pp->mpp, 0)) {
1318                                 if (pp->mpp->pgfailback > 0)
1319                                         pp->mpp->failback_tick =
1320                                                 pp->mpp->pgfailback + 1;
1321                                 else if (pp->mpp->pgfailback ==
1322                                                 -FAILBACK_IMMEDIATE)
1323                                         switch_pathgroup(pp->mpp);
1324                         }
1325                 }
1326                 defered_failback_tick(vecs->mpvec);
1327                 retry_count_tick(vecs->mpvec);
1328
1329                 if (count)
1330                         count--;
1331                 else {
1332                         condlog(4, "map garbage collection");
1333                         mpvec_garbage_collector(vecs);
1334                         count = MAPGCINT;
1335                 }
1336                 
1337                 lock_cleanup_pop(vecs->lock);
1338                 sleep(1);
1339         }
1340         return NULL;
1341 }
1342
1343 static struct vectors *
1344 init_paths (void)
1345 {
1346         struct vectors * vecs;
1347
1348         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1349
1350         if (!vecs)
1351                 return NULL;
1352
1353         vecs->lock = 
1354                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1355
1356         if (!vecs->lock)
1357                 goto out;
1358
1359         vecs->pathvec = vector_alloc();
1360
1361         if (!vecs->pathvec)
1362                 goto out1;
1363                 
1364         vecs->mpvec = vector_alloc();
1365
1366         if (!vecs->mpvec)
1367                 goto out2;
1368         
1369         pthread_mutex_init(vecs->lock, NULL);
1370
1371         return vecs;
1372
1373 out2:
1374         vector_free(vecs->pathvec);
1375 out1:
1376         FREE(vecs->lock);
1377 out:
1378         FREE(vecs);
1379         condlog(0, "failed to init paths");
1380         return NULL;
1381 }
1382
1383 static void *
1384 signal_set(int signo, void (*func) (int))
1385 {
1386         int r;
1387         struct sigaction sig;
1388         struct sigaction osig;
1389
1390         sig.sa_handler = func;
1391         sigemptyset(&sig.sa_mask);
1392         sig.sa_flags = 0;
1393
1394         r = sigaction(signo, &sig, &osig);
1395
1396         if (r < 0)
1397                 return (SIG_ERR);
1398         else
1399                 return (osig.sa_handler);
1400 }
1401
1402 static void
1403 sighup (int sig)
1404 {
1405         condlog(3, "SIGHUP received");
1406
1407 #ifdef _DEBUG_
1408         dbg_free_final(NULL);
1409 #endif
1410 }
1411
1412 static void
1413 sigend (int sig)
1414 {
1415         exit_daemon(0);
1416 }
1417
1418 static void
1419 signal_init(void)
1420 {
1421         signal_set(SIGHUP, sighup);
1422         signal_set(SIGINT, sigend);
1423         signal_set(SIGTERM, sigend);
1424         signal_set(SIGKILL, sigend);
1425 }
1426
1427 static void
1428 setscheduler (void)
1429 {
1430         int res;
1431         static struct sched_param sched_param = {
1432                 sched_priority: 99
1433         };
1434
1435         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1436
1437         if (res == -1)
1438                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1439         return;
1440 }
1441
1442 static void
1443 set_oom_adj (int val)
1444 {
1445         FILE *fp;
1446
1447         fp = fopen("/proc/self/oom_adj", "w");
1448
1449         if (!fp)
1450                 return;
1451
1452         fprintf(fp, "%i", val);
1453         fclose(fp);
1454 }
1455         
1456 static int
1457 child (void * param)
1458 {
1459         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1460         pthread_attr_t attr;
1461         struct vectors * vecs;
1462
1463         mlockall(MCL_CURRENT | MCL_FUTURE);
1464
1465         if (logsink)
1466                 log_thread_start();
1467
1468         condlog(2, "--------start up--------");
1469         condlog(2, "read " DEFAULT_CONFIGFILE);
1470
1471         if (load_config(DEFAULT_CONFIGFILE))
1472                 exit(1);
1473
1474         setlogmask(LOG_UPTO(conf->verbosity + 3));
1475
1476         /*
1477          * fill the voids left in the config file
1478          */
1479         if (!conf->checkint) {
1480                 conf->checkint = CHECKINT;
1481                 conf->max_checkint = MAX_CHECKINT;
1482         }
1483
1484         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1485                 if (logsink)
1486                         log_thread_stop();
1487
1488                 exit(1);
1489         }
1490         signal_init();
1491         setscheduler();
1492         set_oom_adj(-17);
1493         vecs = init_paths();
1494
1495         if (!vecs)
1496                 exit(1);
1497
1498         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1499                 condlog(0, "can not find sysfs mount point");
1500                 exit(1);
1501         }
1502
1503         /*
1504          * fetch paths and multipaths lists
1505          * no paths and/or no multipaths are valid scenarii
1506          * vectors maintenance will be driven by events
1507          */
1508         path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
1509         map_discovery(vecs);
1510
1511         /*
1512          * start threads
1513          */
1514         pthread_attr_init(&attr);
1515         pthread_attr_setstacksize(&attr, 64 * 1024);
1516         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1517         
1518         pthread_create(&check_thr, &attr, checkerloop, vecs);
1519         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1520         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1521
1522         pthread_cond_wait(&exit_cond, &exit_mutex);
1523
1524         /*
1525          * exit path
1526          */
1527         lock(vecs->lock);
1528         remove_maps(vecs);
1529         free_pathvec(vecs->pathvec, FREE_PATHS);
1530
1531         pthread_cancel(check_thr);
1532         pthread_cancel(uevent_thr);
1533         pthread_cancel(uxlsnr_thr);
1534
1535         free_keys(keys);
1536         keys = NULL;
1537         free_handlers(handlers);
1538         handlers = NULL;
1539         free_polls();
1540
1541         unlock(vecs->lock);
1542         pthread_mutex_destroy(vecs->lock);
1543         FREE(vecs->lock);
1544         vecs->lock = NULL;
1545         FREE(vecs);
1546         vecs = NULL;
1547         free_config(conf);
1548         conf = NULL;
1549
1550         condlog(2, "--------shut down-------");
1551         
1552         if (logsink)
1553                 log_thread_stop();
1554
1555 #ifdef _DEBUG_
1556         dbg_free_final(NULL);
1557 #endif
1558
1559         exit(0);
1560 }
1561
1562 static int
1563 daemonize(void)
1564 {
1565         int pid;
1566         int in_fd, out_fd;
1567
1568         if( (pid = fork()) < 0){
1569                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1570                 return -1;
1571         }
1572         else if (pid != 0)
1573                 return pid;
1574
1575         setsid();
1576
1577         if ( (pid = fork()) < 0)
1578                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1579         else if (pid != 0)
1580                 _exit(0);
1581
1582         in_fd = open("/dev/null", O_RDONLY);
1583         if (in_fd < 0){
1584                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1585                         strerror(errno));
1586                 _exit(0);
1587         }
1588         out_fd = open("/dev/console", O_WRONLY);
1589         if (out_fd < 0){
1590                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1591                         strerror(errno));
1592                 _exit(0);
1593         }
1594
1595         close(STDIN_FILENO);
1596         dup(in_fd);
1597         close(STDOUT_FILENO);
1598         dup(out_fd);
1599         close(STDERR_FILENO);
1600         dup(out_fd);
1601
1602         close(in_fd);
1603         close(out_fd);
1604         chdir("/");
1605         umask(0);
1606         return 0;
1607 }
1608
1609 int
1610 main (int argc, char *argv[])
1611 {
1612         extern char *optarg;
1613         extern int optind;
1614         int arg;
1615         int err;
1616         
1617         logsink = 1;
1618
1619         if (getuid() != 0) {
1620                 fprintf(stderr, "need to be root\n");
1621                 exit(1);
1622         }
1623
1624         /* make sure we don't lock any path */
1625         chdir("/");
1626         umask(umask(077) | 022);
1627
1628         conf = alloc_config();
1629
1630         if (!conf)
1631                 exit(1);
1632
1633         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1634         switch(arg) {
1635                 case 'd':
1636                         logsink = 0;
1637                         //debug=1; /* ### comment me out ### */
1638                         break;
1639                 case 'v':
1640                         if (sizeof(optarg) > sizeof(char *) ||
1641                             !isdigit(optarg[0]))
1642                                 exit(1);
1643
1644                         conf->verbosity = atoi(optarg);
1645                         break;
1646                 case 'k':
1647                         uxclnt(optarg);
1648                         exit(0);
1649                 default:
1650                         ;
1651                 }
1652         }
1653
1654         if (!logsink)
1655                 err = 0;
1656         else
1657                 err = daemonize();
1658         
1659         if (err < 0)
1660                 /* error */
1661                 exit(1);
1662         else if (err > 0)
1663                 /* parent dies */
1664                 exit(0);
1665         else
1666                 /* child lives */
1667                 return (child(NULL));
1668 }