[multipathd] remove pthread_cancel logic to stop waiter a thread
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 #include <unistd.h>
2 #include <sys/stat.h>
3 #include <libdevmapper.h>
4 #include <wait.h>
5 #include <sys/mman.h>
6
7 /*
8  * libsysfs
9  */
10 #include <sysfs/libsysfs.h>
11 #include <sysfs/dlist.h>
12
13 /*
14  * libcheckers
15  */
16 #include <checkers.h>
17 #include <path_state.h>
18
19 /*
20  * libmultipath
21  */
22 #include <parser.h>
23 #include <vector.h>
24 #include <memory.h>
25 #include <config.h>
26 #include <callout.h>
27 #include <util.h>
28 #include <blacklist.h>
29 #include <hwtable.h>
30 #include <defaults.h>
31 #include <structs.h>
32 #include <dmparser.h>
33 #include <devmapper.h>
34 #include <dict.h>
35 #include <discovery.h>
36 #include <debug.h>
37 #include <propsel.h>
38 #include <uevent.h>
39 #include <switchgroup.h>
40 #include <path_state.h>
41 #include <print.h>
42
43 #include "main.h"
44 #include "pidfile.h"
45 #include "uxlsnr.h"
46 #include "uxclnt.h"
47 #include "cli.h"
48 #include "cli_handlers.h"
49
50 #define FILE_NAME_SIZE 256
51 #define CMDSIZE 160
52
53 #define LOG_MSG(a,b) \
54         if (strlen(b)) { \
55                 condlog(a, "%s: %s", pp->dev_t, b); \
56                 memset(b, 0, MAX_CHECKER_MSG_SIZE); \
57         }
58
59 #ifdef LCKDBG
60 #define lock(a) \
61         fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
62         pthread_mutex_lock(a)
63 #define unlock(a) \
64         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
65         pthread_mutex_unlock(a)
66 #define lock_cleanup_pop(a) \
67         fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
68         pthread_cleanup_pop(1);
69 #else
70 #define lock(a) pthread_mutex_lock(a)
71 #define unlock(a) pthread_mutex_unlock(a)
72 #define lock_cleanup_pop(a) pthread_cleanup_pop(1);
73 #endif
74
75 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
76 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
77
78 /*
79  * structs
80  */
81 struct event_thread {
82         struct dm_task *dmt;
83         pthread_t thread;
84         int event_nr;
85         char mapname[WWID_SIZE];
86         struct vectors *vecs;
87 };
88
89 static struct event_thread *
90 alloc_waiter (void)
91 {
92
93         struct event_thread * wp;
94
95         wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
96
97         return wp;
98 }
99
100 static void
101 free_waiter (void * data)
102 {
103         struct event_thread * wp = (struct event_thread *)data;
104
105         if (wp->dmt)
106                 dm_task_destroy(wp->dmt);
107         FREE(wp);
108 }
109
110 static void
111 stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
112 {
113         struct event_thread * wp = (struct event_thread *)mpp->waiter;
114         pthread_t thread = wp->thread;
115
116         if (!wp)
117                 return;
118
119         condlog(2, "%s: stop event checker thread", wp->mapname);
120         pthread_kill(thread, SIGHUP);
121 }
122
123 static void
124 cleanup_lock (void * data)
125 {
126         pthread_mutex_unlock((pthread_mutex_t *)data);
127 }
128
129 static void
130 adopt_paths (struct vectors * vecs, struct multipath * mpp)
131 {
132         int i;
133         struct path * pp;
134
135         if (!mpp)
136                 return;
137
138         vector_foreach_slot (vecs->pathvec, pp, i) {
139                 if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
140                         condlog(4, "%s ownership set", pp->dev_t);
141                         pp->mpp = mpp;
142                 }
143         }
144 }
145
146 static void
147 orphan_path (struct path * pp)
148 {
149         pp->mpp = NULL;
150         pp->checkfn = NULL;
151         pp->dmstate = PSTATE_UNDEF;
152         pp->checker_context = NULL;
153         pp->getuid = NULL;
154         pp->getprio = NULL;
155
156         if (pp->fd >= 0)
157                 close(pp->fd);
158
159         pp->fd = -1;
160 }
161
162 static void
163 orphan_paths (struct vectors * vecs, struct multipath * mpp)
164 {
165         int i;
166         struct path * pp;
167
168         vector_foreach_slot (vecs->pathvec, pp, i) {
169                 if (pp->mpp == mpp) {
170                         condlog(4, "%s is orphaned", pp->dev_t);
171                         orphan_path(pp);
172                 }
173         }
174 }
175
176 static int
177 update_multipath_table (struct multipath *mpp, vector pathvec)
178 {
179         if (!mpp)
180                 return 1;
181
182         if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
183                 return 1;
184
185         if (disassemble_map(pathvec, mpp->params, mpp))
186                 return 1;
187
188         return 0;
189 }
190
191 static int
192 update_multipath_status (struct multipath *mpp)
193 {
194         if (!mpp)
195                 return 1;
196
197         if(dm_get_status(mpp->alias, mpp->status))
198                 return 1;
199
200         if (disassemble_status(mpp->status, mpp))
201                 return 1;
202
203         return 0;
204 }
205
206 static int
207 update_multipath_strings (struct multipath *mpp, vector pathvec)
208 {
209         if (mpp->selector) {
210                 FREE(mpp->selector);
211                 mpp->selector = NULL;
212         }
213
214         if (mpp->features) {
215                 FREE(mpp->features);
216                 mpp->features = NULL;
217         }
218
219         if (mpp->hwhandler) {
220                 FREE(mpp->hwhandler);
221                 mpp->hwhandler = NULL;
222         }
223
224         free_pgvec(mpp->pg, KEEP_PATHS);
225         mpp->pg = NULL;
226
227         if (update_multipath_table(mpp, pathvec))
228                 return 1;
229
230         if (update_multipath_status(mpp))
231                 return 1;
232
233         return 0;
234 }
235
236 static void
237 set_multipath_wwid (struct multipath * mpp)
238 {
239         if (mpp->wwid)
240                 return;
241
242         dm_get_uuid(mpp->alias, mpp->wwid);
243 }
244
245 static int
246 pathcount (struct multipath *mpp, int state)
247 {
248         struct pathgroup *pgp;
249         struct path *pp;
250         int i, j;
251         int count = 0;
252
253         vector_foreach_slot (mpp->pg, pgp, i)
254                 vector_foreach_slot (pgp->paths, pp, j)
255                         if (pp->state == state)
256                                 count++;
257         return count;
258 }
259
260 /*
261  * mpp->no_path_retry:
262  *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
263  *   -1 (FAIL)  : fail_if_no_path
264  *    0 (UNDEF) : nothing
265  *   >0         : queue_if_no_path enabled, turned off after polling n times
266  */
267 static void
268 update_queue_mode_del_path(struct multipath *mpp)
269 {
270         if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
271                 /*
272                  * Enter retry mode.
273                  * meaning of +1: retry_tick may be decremented in
274                  *                checkerloop before starting retry.
275                  */
276                 mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
277                 condlog(1, "%s: Entering recovery mode: max_retries=%d",
278                         mpp->alias, mpp->no_path_retry);
279         }
280         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
281 }
282
283 static void
284 update_queue_mode_add_path(struct multipath *mpp)
285 {
286         if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
287                 /* come back to normal mode from retry mode */
288                 mpp->retry_tick = 0;
289                 dm_queue_if_no_path(mpp->alias, 1);
290                 condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
291                 condlog(1, "%s: Recovered to normal mode", mpp->alias);
292         }
293         condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
294 }
295
296 static void
297 set_no_path_retry(struct multipath *mpp)
298 {
299         mpp->retry_tick = 0;
300         mpp->nr_active = pathcount(mpp, PATH_UP);
301         select_no_path_retry(mpp);
302
303         switch (mpp->no_path_retry) {
304         case NO_PATH_RETRY_UNDEF:
305                 break;
306         case NO_PATH_RETRY_FAIL:
307                 dm_queue_if_no_path(mpp->alias, 0);
308                 break;
309         case NO_PATH_RETRY_QUEUE:
310                 dm_queue_if_no_path(mpp->alias, 1);
311                 break;
312         default:
313                 dm_queue_if_no_path(mpp->alias, 1);
314                 if (mpp->nr_active == 0) {
315                         /* Enter retry mode */
316                         mpp->retry_tick = mpp->no_path_retry * conf->checkint;
317                         condlog(1, "%s: Entering recovery mode: max_retries=%d",
318                                 mpp->alias, mpp->no_path_retry);
319                 }
320                 break;
321         }
322 }
323
324 static struct hwentry *
325 extract_hwe_from_path(struct multipath * mpp)
326 {
327         struct path * pp;
328         struct pathgroup * pgp;
329
330         pgp = VECTOR_SLOT(mpp->pg, 0);
331         pp = VECTOR_SLOT(pgp->paths, 0);
332
333         return pp->hwe;
334 }
335
336 static void
337 remove_map (struct multipath * mpp, struct vectors * vecs)
338 {
339         int i;
340
341         stop_waiter_thread(mpp, vecs);
342
343         /*
344          * clear references to this map
345          */
346         orphan_paths(vecs, mpp);
347
348         /*
349          * purge the multipath vector
350          */
351         i = find_slot(vecs->mpvec, (void *)mpp);
352         vector_del_slot(vecs->mpvec, i);
353
354         /*
355          * final free
356          */
357         free_multipath(mpp, KEEP_PATHS);
358         mpp = NULL;
359 }
360
361 static void
362 remove_maps (struct vectors * vecs)
363 {
364         int i;
365         struct multipath * mpp;
366
367         vector_foreach_slot (vecs->mpvec, mpp, i) {
368                 remove_map(mpp, vecs);
369                 i--;
370         }
371
372         vector_free(vecs->mpvec);
373         vecs->mpvec = NULL;
374 }
375
376 static int
377 setup_multipath (struct vectors * vecs, struct multipath * mpp)
378 {
379         set_multipath_wwid(mpp);
380         mpp->mpe = find_mpe(mpp->wwid);
381         condlog(4, "discovered map %s", mpp->alias);
382
383         if (update_multipath_strings(mpp, vecs->pathvec))
384                 goto out;
385
386         adopt_paths(vecs, mpp);
387         select_pgfailback(mpp);
388         mpp->hwe = extract_hwe_from_path(mpp);
389         set_no_path_retry(mpp);
390
391         return 0;
392 out:
393         remove_map(mpp, vecs);
394         condlog(0, "failed to setup multipath");
395         return 1;
396 }
397
398 static int
399 need_switch_pathgroup (struct multipath * mpp, int refresh)
400 {
401         struct pathgroup * pgp;
402         struct path * pp;
403         int i, j;
404
405         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
406                 return 0;
407
408         /*
409          * Refresh path priority values
410          */
411         if (refresh)
412                 vector_foreach_slot (mpp->pg, pgp, i)
413                         vector_foreach_slot (pgp->paths, pp, j)
414                                 pathinfo(pp, conf->hwtable, DI_PRIO);
415
416         select_path_group(mpp); /* sets mpp->nextpg */
417         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
418
419         if (pgp && pgp->status != PGSTATE_ACTIVE)
420                 return 1;
421
422         return 0;
423 }
424
425 static void
426 switch_pathgroup (struct multipath * mpp)
427 {
428         struct pathgroup * pgp;
429         
430         pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
431         
432         if (pgp && pgp->status != PGSTATE_ACTIVE) {
433                 dm_switchgroup(mpp->alias, mpp->nextpg);
434                 condlog(2, "%s: switch to path group #%i",
435                          mpp->alias, mpp->nextpg);
436         }
437 }
438
439 static int
440 update_multipath (struct vectors *vecs, char *mapname)
441 {
442         struct multipath *mpp;
443         struct pathgroup  *pgp;
444         struct path *pp;
445         int i, j;
446         int r = 1;
447
448         mpp = find_mp(vecs->mpvec, mapname);
449
450         if (!mpp)
451                 goto out;
452
453         free_pgvec(mpp->pg, KEEP_PATHS);
454         mpp->pg = NULL;
455
456         if (setup_multipath(vecs, mpp))
457                 goto out; /* mpp freed in setup_multipath */
458
459         /*
460          * compare checkers states with DM states
461          */
462         vector_foreach_slot (mpp->pg, pgp, i) {
463                 vector_foreach_slot (pgp->paths, pp, j) {
464                         if (pp->dmstate != PSTATE_FAILED)
465                                 continue;
466
467                         if (pp->state != PATH_DOWN) {
468                                 condlog(2, "%s: mark as failed", pp->dev_t);
469                                 pp->state = PATH_DOWN;
470                                 update_queue_mode_del_path(mpp);
471
472                                 /*
473                                  * if opportune,
474                                  * schedule the next check earlier
475                                  */
476                                 if (pp->tick > conf->checkint)
477                                         pp->tick = conf->checkint;
478                         }
479                 }
480         }
481         r = 0;
482 out:
483         if (r)
484                 condlog(0, "failed to update multipath");
485
486         return r;
487 }
488
489 static sigset_t unblock_sighup(void)
490 {
491         sigset_t set, old;
492
493         sigemptyset(&set);
494         sigaddset(&set, SIGHUP);
495         pthread_sigmask(SIG_UNBLOCK, &set, &old);
496         return old;
497 }
498
499 /*
500  * returns the reschedule delay
501  * negative means *stop*
502  */
503 static int
504 waiteventloop (struct event_thread * waiter)
505 {
506         sigset_t set;
507         int event_nr;
508         int r;
509
510         if (!waiter->event_nr)
511                 waiter->event_nr = dm_geteventnr(waiter->mapname);
512
513         if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
514                 return 1;
515
516         if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
517                 dm_task_destroy(waiter->dmt);
518                 return 1;
519         }
520
521         if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
522                                                       waiter->event_nr)) {
523                 dm_task_destroy(waiter->dmt);
524                 return 1;
525         }
526
527         dm_task_no_open_count(waiter->dmt);
528         
529         /* accept wait interruption */
530         set = unblock_sighup();
531
532         /* interruption spits messages */
533         dm_shut_log();
534
535         /* wait */
536         r = dm_task_run(waiter->dmt);
537
538         /* wait is over : event or interrupt */
539         pthread_sigmask(SIG_SETMASK, &set, NULL);
540         //dm_restore_log();
541
542         if (!r) /* wait interrupted by signal */
543                 return -1;
544
545         dm_task_destroy(waiter->dmt);
546         waiter->dmt = NULL;
547         waiter->event_nr++;
548
549         /*
550          * upon event ...
551          */
552         while (1) {
553                 condlog(3, "%s: devmap event #%i",
554                                 waiter->mapname, waiter->event_nr);
555
556                 /*
557                  * event might be :
558                  *
559                  * 1) a table reload, which means our mpp structure is
560                  *    obsolete : refresh it through update_multipath()
561                  * 2) a path failed by DM : mark as such through
562                  *    update_multipath()
563                  * 3) map has gone away : stop the thread.
564                  * 4) a path reinstate : nothing to do
565                  * 5) a switch group : nothing to do
566                  */
567                 pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
568                 lock(waiter->vecs->lock);
569                 r = update_multipath(waiter->vecs, waiter->mapname);
570                 lock_cleanup_pop(waiter->vecs->lock);
571
572                 if (r)
573                         return -1; /* stop the thread */
574
575                 event_nr = dm_geteventnr(waiter->mapname);
576
577                 if (waiter->event_nr == event_nr)
578                         return 1; /* upon problem reschedule 1s later */
579
580                 waiter->event_nr = event_nr;
581         }
582         return -1; /* never reach there */
583 }
584
585 static void *
586 waitevent (void * et)
587 {
588         int r;
589         struct event_thread *waiter;
590
591         mlockall(MCL_CURRENT | MCL_FUTURE);
592
593         waiter = (struct event_thread *)et;
594         pthread_cleanup_push(free_waiter, et);
595
596         while (1) {
597                 r = waiteventloop(waiter);
598
599                 if (r < 0)
600                         break;
601
602                 sleep(r);
603         }
604
605         pthread_cleanup_pop(1);
606         return NULL;
607 }
608
609 static int
610 start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
611 {
612         pthread_attr_t attr;
613         struct event_thread * wp;
614
615         if (!mpp)
616                 return 0;
617
618         if (pthread_attr_init(&attr))
619                 goto out;
620
621         pthread_attr_setstacksize(&attr, 32 * 1024);
622         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
623
624         wp = alloc_waiter();
625
626         if (!wp)
627                 goto out;
628
629         mpp->waiter = (void *)wp;
630         strncpy(wp->mapname, mpp->alias, WWID_SIZE);
631         wp->vecs = vecs;
632
633         if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
634                 condlog(0, "%s: cannot create event checker", wp->mapname);
635                 goto out1;
636         }
637         condlog(2, "%s: event checker started", wp->mapname);
638
639         return 0;
640 out1:
641         free_waiter(wp);
642         mpp->waiter = NULL;
643 out:
644         condlog(0, "failed to start waiter thread");
645         return 1;
646 }
647
648 int
649 uev_add_map (char * devname, struct vectors * vecs)
650 {
651         int major, minor;
652         char dev_t[BLK_DEV_SIZE];
653         char * alias;
654         struct multipath * mpp;
655
656         if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
657                 return 1;
658
659         if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
660                 return 1;
661
662         alias = dm_mapname(major, minor);
663                 
664         if (!alias)
665                 return 1;
666         
667         if (!dm_type(alias, DEFAULT_TARGET)) {
668                 condlog(4, "%s: not a multipath map", alias);
669                 FREE(alias);
670                 return 0;
671         }
672
673         mpp = find_mp(vecs->mpvec, alias);
674
675         if (mpp) {
676                 /*
677                  * this should not happen,
678                  * we missed a remove map event (not sent ?)
679                  */
680                 condlog(2, "%s: already registered", alias);
681                 remove_map(mpp, vecs);
682         }
683
684         /*
685          * now we can allocate
686          */
687         mpp = alloc_multipath();
688
689         if (!mpp)
690                 return 1;
691
692         mpp->minor = minor;
693         mpp->alias = alias;
694
695         if (setup_multipath(vecs, mpp))
696                 return 1; /* mpp freed in setup_multipath */
697
698         if (!vector_alloc_slot(vecs->mpvec))
699                 goto out;
700
701         vector_set_slot(vecs->mpvec, mpp);
702         adopt_paths(vecs, mpp);
703
704         if (start_waiter_thread(mpp, vecs))
705                 goto out;
706
707         return 0;
708 out:
709         condlog(2, "%s: add devmap failed", mpp->alias);
710         remove_map(mpp, vecs);
711         return 1;
712 }
713
714 int
715 uev_remove_map (char * devname, struct vectors * vecs)
716 {
717         int minor;
718         struct multipath * mpp;
719
720         if (sscanf(devname, "dm-%d", &minor) != 1)
721                 return 1;
722
723         mpp = find_mp_by_minor(vecs->mpvec, minor);
724
725         if (!mpp) {
726                 condlog(3, "%s: devmap not registered, can't remove",
727                         devname);
728                 return 1;
729         }
730
731         condlog(2, "remove %s devmap", mpp->alias);
732         remove_map(mpp, vecs);
733
734         return 0;
735 }
736
737 int
738 uev_add_path (char * devname, struct vectors * vecs)
739 {
740         struct path * pp;
741
742         pp = find_path_by_dev(vecs->pathvec, devname);
743
744         if (pp) {
745                 condlog(3, "%s: already in pathvec");
746                 return 1;
747         }
748         pp = store_pathinfo(vecs->pathvec, conf->hwtable,
749                        devname, DI_SYSFS | DI_WWID);
750
751         if (!pp) {
752                 condlog(0, "%s: failed to store path info", devname);
753                 return 1;
754         }
755
756         condlog(2, "%s: path checker registered", devname);
757         pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
758
759         if (pp->mpp) {
760                 condlog(4, "%s: ownership set to %s",
761                                 pp->dev_t, pp->mpp->alias);
762         } else {
763                 condlog(4, "%s: orphaned", pp->dev_t);
764                 orphan_path(pp);
765         }
766
767         return 0;
768 }
769
770 int
771 uev_remove_path (char * devname, struct vectors * vecs)
772 {
773         int i;
774         struct path * pp;
775
776         pp = find_path_by_dev(vecs->pathvec, devname);
777
778         if (!pp) {
779                 condlog(3, "%s: not in pathvec");
780                 return 1;
781         }
782
783         if (pp->mpp && pp->state == PATH_UP)
784                 update_queue_mode_del_path(pp->mpp);
785
786         condlog(2, "remove %s path checker", devname);
787         i = find_slot(vecs->pathvec, (void *)pp);
788         vector_del_slot(vecs->pathvec, i);
789         free_path(pp);
790
791         return 0;
792 }
793
794 int
795 show_paths (char ** r, int * len, struct vectors * vecs)
796 {
797         int i;
798         struct path * pp;
799         char * c;
800         char * reply;
801         struct path_layout pl;
802
803         get_path_layout(&pl, vecs->pathvec);
804         reply = MALLOC(MAX_REPLY_LEN);
805
806         if (!reply)
807                 return 1;
808
809         c = reply;
810         c += snprint_path_header(c, reply + MAX_REPLY_LEN - c,
811                                  PRINT_PATH_CHECKER, &pl);
812
813         vector_foreach_slot(vecs->pathvec, pp, i)
814                 c += snprint_path(c, reply + MAX_REPLY_LEN - c,
815                                   PRINT_PATH_CHECKER, pp, &pl);
816
817         *r = reply;
818         *len = (int)(c - reply + 1);
819         return 0;
820 }
821
822 int
823 show_maps (char ** r, int *len, struct vectors * vecs)
824 {
825         int i;
826         struct multipath * mpp;
827         char * c;
828         char * reply;
829         struct map_layout ml;
830
831         get_map_layout(&ml, vecs->mpvec);
832         reply = MALLOC(MAX_REPLY_LEN);
833
834         if (!reply)
835                 return 1;
836
837         c = reply;
838         c += snprint_map_header(c, reply + MAX_REPLY_LEN - c,
839                                 PRINT_MAP_FAILBACK, &ml);
840
841         vector_foreach_slot(vecs->mpvec, mpp, i)
842                 c += snprint_map(c, reply + MAX_REPLY_LEN - c,
843                                  PRINT_MAP_FAILBACK, mpp, &ml);
844
845         *r = reply;
846         *len = (int)(c - reply + 1);
847         return 0;
848 }
849
850 int
851 dump_pathvec (char ** r, int * len, struct vectors * vecs)
852 {
853         int i;
854         struct path * pp;
855         char * reply;
856         char * p;
857
858         *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
859         reply = (char *)MALLOC(*len);
860         *r = reply;
861
862         if (!reply)
863                 return 1;
864
865         p = reply;
866
867         vector_foreach_slot (vecs->pathvec, pp, i) {
868                 memcpy((void *)p, pp, sizeof(struct path));
869                 p += sizeof(struct path);
870         }
871
872         /* return negative to hint caller not to add "ok" to the dump */
873         return -1;
874 }
875
876 static int
877 map_discovery (struct vectors * vecs)
878 {
879         int i;
880         struct multipath * mpp;
881
882         if (dm_get_maps(vecs->mpvec, "multipath"))
883                 return 1;
884
885         vector_foreach_slot (vecs->mpvec, mpp, i) {
886                 if (setup_multipath(vecs, mpp))
887                         return 1;
888                 mpp->minor = dm_get_minor(mpp->alias);
889                 start_waiter_thread(mpp, vecs);
890         }
891
892         return 0;
893 }
894
895 int
896 reconfigure (struct vectors * vecs)
897 {
898         struct config * old = conf;
899         struct multipath * mpp;
900         struct path * pp;
901         int i;
902
903         conf = NULL;
904
905         if (load_config(DEFAULT_CONFIGFILE)) {
906                 conf = old;
907                 condlog(2, "reconfigure failed, continue with old config");
908                 return 1;
909         }
910         conf->verbosity = old->verbosity;
911         free_config(old);
912
913         vector_foreach_slot (vecs->mpvec, mpp, i) {
914                 mpp->mpe = find_mpe(mpp->wwid);
915                 mpp->hwe = extract_hwe_from_path(mpp);
916                 adopt_paths(vecs, mpp);
917                 set_no_path_retry(mpp);
918         }
919         vector_foreach_slot (vecs->pathvec, pp, i) {
920                 select_checkfn(pp);
921                 select_getuid(pp);
922                 select_getprio(pp);
923         }
924         condlog(2, "reconfigured");
925         return 0;
926 }
927
928 int
929 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
930 {
931         struct vectors * vecs;
932         int r;
933         
934         *reply = NULL;
935         *len = 0;
936         vecs = (struct vectors *)trigger_data;
937
938         pthread_cleanup_push(cleanup_lock, vecs->lock);
939         lock(vecs->lock);
940
941         r = parse_cmd(str, reply, len, vecs);
942
943         if (r > 0) {
944                 *reply = STRDUP("fail\n");
945                 *len = strlen(*reply) + 1;
946                 r = 1;
947         }
948         else if (!r && *len == 0) {
949                 *reply = STRDUP("ok\n");
950                 *len = strlen(*reply) + 1;
951                 r = 0;
952         }
953         /* else if (r < 0) leave *reply alone */
954
955         lock_cleanup_pop(vecs->lock);
956         return r;
957 }
958
959 static int
960 uev_discard(char * devpath)
961 {
962         char a[10], b[10];
963
964         /*
965          * keep only block devices, discard partitions
966          */
967         if (sscanf(devpath, "/block/%10s", a) != 1 ||
968             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
969                 condlog(4, "discard event on %s", devpath);
970                 return 1;
971         }
972         return 0;
973 }
974
975 int 
976 uev_trigger (struct uevent * uev, void * trigger_data)
977 {
978         int r = 0;
979         char devname[32];
980         struct vectors * vecs;
981
982         vecs = (struct vectors *)trigger_data;
983
984         if (uev_discard(uev->devpath))
985                 goto out;
986
987         basename(uev->devpath, devname);
988         lock(vecs->lock);
989
990         /*
991          * device map add/remove event
992          */
993         if (!strncmp(devname, "dm-", 3)) {
994                 if (!strncmp(uev->action, "add", 3)) {
995                         r = uev_add_map(devname, vecs);
996                         goto out;
997                 }
998 #if 0
999                 if (!strncmp(uev->action, "remove", 6)) {
1000                         r = uev_remove_map(devname, vecs);
1001                         goto out;
1002                 }
1003 #endif
1004                 goto out;
1005         }
1006         
1007         /*
1008          * path add/remove event
1009          */
1010         if (blacklist(conf->blist, devname))
1011                 goto out;
1012
1013         if (!strncmp(uev->action, "add", 3)) {
1014                 r = uev_add_path(devname, vecs);
1015                 goto out;
1016         }
1017         if (!strncmp(uev->action, "remove", 6)) {
1018                 r = uev_remove_path(devname, vecs);
1019                 goto out;
1020         }
1021
1022 out:
1023         unlock(vecs->lock);
1024         return r;
1025 }
1026
1027 static void *
1028 ueventloop (void * ap)
1029 {
1030         if (uevent_listen(&uev_trigger, ap))
1031                 fprintf(stderr, "error starting uevent listener");
1032                 
1033         return NULL;
1034 }
1035
1036 static void *
1037 uxlsnrloop (void * ap)
1038 {
1039         if (load_keys())
1040                 return NULL;
1041         
1042         if (alloc_handlers())
1043                 return NULL;
1044
1045         add_handler(LIST+PATHS, cli_list_paths);
1046         add_handler(LIST+MAPS, cli_list_maps);
1047         add_handler(ADD+PATH, cli_add_path);
1048         add_handler(DEL+PATH, cli_del_path);
1049         add_handler(ADD+MAP, cli_add_map);
1050         add_handler(DEL+MAP, cli_del_map);
1051         add_handler(SWITCH+MAP+GROUP, cli_switch_group);
1052         add_handler(DUMP+PATHVEC, cli_dump_pathvec);
1053         add_handler(RECONFIGURE, cli_reconfigure);
1054
1055         uxsock_listen(&uxsock_trigger, ap);
1056
1057         return NULL;
1058 }
1059
1060 static int
1061 exit_daemon (int status)
1062 {
1063         if (status != 0)
1064                 fprintf(stderr, "bad exit status. see daemon.log\n");
1065
1066         condlog(3, "unlink pidfile");
1067         unlink(DEFAULT_PIDFILE);
1068
1069         lock(&exit_mutex);
1070         pthread_cond_signal(&exit_cond);
1071         unlock(&exit_mutex);
1072
1073         return status;
1074 }
1075
1076 static void
1077 fail_path (struct path * pp)
1078 {
1079         if (!pp->mpp)
1080                 return;
1081
1082         condlog(2, "checker failed path %s in map %s",
1083                  pp->dev_t, pp->mpp->alias);
1084
1085         dm_fail_path(pp->mpp->alias, pp->dev_t);
1086         update_queue_mode_del_path(pp->mpp);
1087 }
1088
1089 /*
1090  * caller must have locked the path list before calling that function
1091  */
1092 static void
1093 reinstate_path (struct path * pp)
1094 {
1095         if (!pp->mpp)
1096                 return;
1097
1098         if (dm_reinstate(pp->mpp->alias, pp->dev_t))
1099                 condlog(0, "%s: reinstate failed", pp->dev_t);
1100         else {
1101                 condlog(2, "%s: reinstated", pp->dev_t);
1102                 update_queue_mode_add_path(pp->mpp);
1103         }
1104 }
1105
1106 static void
1107 enable_group(struct path * pp)
1108 {
1109         struct pathgroup * pgp;
1110
1111         /*
1112          * if path is added through uev_add_path, pgindex can be unset.
1113          * next update_strings() will set it, upon map reload event.
1114          *
1115          * we can safely return here, because upon map reload, all
1116          * PG will be enabled.
1117          */
1118         if (!pp->pgindex)
1119                 return;
1120
1121         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1122         
1123         if (pgp->status == PGSTATE_DISABLED) {
1124                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1125                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1126         }
1127 }
1128
1129 static void
1130 mpvec_garbage_collector (struct vectors * vecs)
1131 {
1132         struct multipath * mpp;
1133         int i;
1134
1135         vector_foreach_slot (vecs->mpvec, mpp, i) {
1136                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1137                         condlog(2, "%s: remove dead map", mpp->alias);
1138                         remove_map(mpp, vecs);
1139                         i--;
1140                 }
1141         }
1142 }
1143
1144 static void
1145 defered_failback_tick (vector mpvec)
1146 {
1147         struct multipath * mpp;
1148         int i;
1149
1150         vector_foreach_slot (mpvec, mpp, i) {
1151                 /*
1152                  * defered failback getting sooner
1153                  */
1154                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1155                         mpp->failback_tick--;
1156
1157                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1158                                 switch_pathgroup(mpp);
1159                 }
1160         }
1161 }
1162
1163 static void
1164 retry_count_tick(vector mpvec)
1165 {
1166         struct multipath *mpp;
1167         int i;
1168
1169         vector_foreach_slot (mpvec, mpp, i) {
1170                 if (mpp->retry_tick) {
1171                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1172                         if(--mpp->retry_tick == 0) {
1173                                 dm_queue_if_no_path(mpp->alias, 0);
1174                                 condlog(2, "%s: Disable queueing", mpp->alias);
1175                         }
1176                 }
1177         }
1178 }
1179
1180 static void *
1181 checkerloop (void *ap)
1182 {
1183         struct vectors *vecs;
1184         struct path *pp;
1185         int i, count = 0;
1186         int newstate;
1187         char checker_msg[MAX_CHECKER_MSG_SIZE];
1188
1189         mlockall(MCL_CURRENT | MCL_FUTURE);
1190
1191         memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
1192         vecs = (struct vectors *)ap;
1193
1194         condlog(2, "path checkers start up");
1195
1196         /*
1197          * init the path check interval
1198          */
1199         vector_foreach_slot (vecs->pathvec, pp, i) {
1200                 pp->checkint = conf->checkint;
1201         }
1202
1203         while (1) {
1204                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1205                 lock(vecs->lock);
1206                 condlog(4, "tick");
1207
1208                 vector_foreach_slot (vecs->pathvec, pp, i) {
1209                         if (!pp->mpp)
1210                                 continue;
1211
1212                         if (pp->tick && --pp->tick)
1213                                 continue; /* don't check this path yet */
1214
1215                         /*
1216                          * provision a next check soonest,
1217                          * in case we exit abnormaly from here
1218                          */
1219                         pp->tick = conf->checkint;
1220                         
1221                         if (!pp->checkfn) {
1222                                 pathinfo(pp, conf->hwtable, DI_SYSFS);
1223                                 select_checkfn(pp);
1224                         }
1225
1226                         if (!pp->checkfn) {
1227                                 condlog(0, "%s: checkfn is void", pp->dev);
1228                                 continue;
1229                         }
1230                         newstate = pp->checkfn(pp->fd, checker_msg,
1231                                                &pp->checker_context);
1232                         
1233                         if (newstate != pp->state) {
1234                                 pp->state = newstate;
1235                                 LOG_MSG(1, checker_msg);
1236
1237                                 /*
1238                                  * upon state change, reset the checkint
1239                                  * to the shortest delay
1240                                  */
1241                                 pp->checkint = conf->checkint;
1242
1243                                 if (newstate == PATH_DOWN ||
1244                                     newstate == PATH_SHAKY) {
1245                                         /*
1246                                          * proactively fail path in the DM
1247                                          */
1248                                         fail_path(pp);
1249
1250                                         /*
1251                                          * cancel scheduled failback
1252                                          */
1253                                         pp->mpp->failback_tick = 0;
1254
1255                                         continue;
1256                                 }
1257
1258                                 /*
1259                                  * reinstate this path
1260                                  */
1261                                 reinstate_path(pp);
1262
1263                                 /*
1264                                  * need to switch group ?
1265                                  */
1266                                 update_multipath_strings(pp->mpp,
1267                                                          vecs->pathvec);
1268
1269                                 /*
1270                                  * schedule defered failback
1271                                  */
1272                                 if (pp->mpp->pgfailback > 0)
1273                                         pp->mpp->failback_tick =
1274                                                 pp->mpp->pgfailback + 1;
1275                                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
1276                                     need_switch_pathgroup(pp->mpp, 1))
1277                                         switch_pathgroup(pp->mpp);
1278
1279                                 /*
1280                                  * if at least one path is up in a group, and
1281                                  * the group is disabled, re-enable it
1282                                  */
1283                                 if (newstate == PATH_UP)
1284                                         enable_group(pp);
1285                         }
1286                         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1287                                 LOG_MSG(4, checker_msg);
1288                                 /*
1289                                  * double the next check delay.
1290                                  * max at conf->max_checkint
1291                                  */
1292                                 if (pp->checkint < (conf->max_checkint / 2))
1293                                         pp->checkint = 2 * pp->checkint;
1294                                 else
1295                                         pp->checkint = conf->max_checkint;
1296
1297                                 pp->tick = pp->checkint;
1298                                 condlog(4, "%s: delay next check %is",
1299                                                 pp->dev_t, pp->tick);
1300
1301                         }
1302                         pp->state = newstate;
1303
1304                         /*
1305                          * path prio refreshing
1306                          */
1307                         condlog(4, "path prio refresh");
1308                         pathinfo(pp, conf->hwtable, DI_PRIO);
1309
1310                         if (need_switch_pathgroup(pp->mpp, 0)) {
1311                                 if (pp->mpp->pgfailback > 0)
1312                                         pp->mpp->failback_tick =
1313                                                 pp->mpp->pgfailback + 1;
1314                                 else if (pp->mpp->pgfailback ==
1315                                                 -FAILBACK_IMMEDIATE)
1316                                         switch_pathgroup(pp->mpp);
1317                         }
1318                 }
1319                 defered_failback_tick(vecs->mpvec);
1320                 retry_count_tick(vecs->mpvec);
1321
1322                 if (count)
1323                         count--;
1324                 else {
1325                         condlog(4, "map garbage collection");
1326                         mpvec_garbage_collector(vecs);
1327                         count = MAPGCINT;
1328                 }
1329                 
1330                 lock_cleanup_pop(vecs->lock);
1331                 sleep(1);
1332         }
1333         return NULL;
1334 }
1335
1336 static struct vectors *
1337 init_paths (void)
1338 {
1339         struct vectors * vecs;
1340
1341         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1342
1343         if (!vecs)
1344                 return NULL;
1345
1346         vecs->lock = 
1347                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1348
1349         if (!vecs->lock)
1350                 goto out;
1351
1352         vecs->pathvec = vector_alloc();
1353
1354         if (!vecs->pathvec)
1355                 goto out1;
1356                 
1357         vecs->mpvec = vector_alloc();
1358
1359         if (!vecs->mpvec)
1360                 goto out2;
1361         
1362         pthread_mutex_init(vecs->lock, NULL);
1363
1364         return vecs;
1365
1366 out2:
1367         vector_free(vecs->pathvec);
1368 out1:
1369         FREE(vecs->lock);
1370 out:
1371         FREE(vecs);
1372         condlog(0, "failed to init paths");
1373         return NULL;
1374 }
1375
1376 static void *
1377 signal_set(int signo, void (*func) (int))
1378 {
1379         int r;
1380         struct sigaction sig;
1381         struct sigaction osig;
1382
1383         sig.sa_handler = func;
1384         sigemptyset(&sig.sa_mask);
1385         sig.sa_flags = 0;
1386
1387         r = sigaction(signo, &sig, &osig);
1388
1389         if (r < 0)
1390                 return (SIG_ERR);
1391         else
1392                 return (osig.sa_handler);
1393 }
1394
1395 static void
1396 sighup (int sig)
1397 {
1398         condlog(3, "SIGHUP received");
1399
1400 #ifdef _DEBUG_
1401         dbg_free_final(NULL);
1402 #endif
1403 }
1404
1405 static void
1406 sigend (int sig)
1407 {
1408         exit_daemon(0);
1409 }
1410
1411 static void
1412 signal_init(void)
1413 {
1414         signal_set(SIGHUP, sighup);
1415         signal_set(SIGINT, sigend);
1416         signal_set(SIGTERM, sigend);
1417         signal_set(SIGKILL, sigend);
1418 }
1419
1420 static void
1421 setscheduler (void)
1422 {
1423         int res;
1424         static struct sched_param sched_param = {
1425                 sched_priority: 99
1426         };
1427
1428         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1429
1430         if (res == -1)
1431                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1432         return;
1433 }
1434
1435 static void
1436 set_oom_adj (int val)
1437 {
1438         FILE *fp;
1439
1440         fp = fopen("/proc/self/oom_adj", "w");
1441
1442         if (!fp)
1443                 return;
1444
1445         fprintf(fp, "%i", val);
1446         fclose(fp);
1447 }
1448         
1449 static int
1450 child (void * param)
1451 {
1452         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1453         pthread_attr_t attr;
1454         struct vectors * vecs;
1455
1456         mlockall(MCL_CURRENT | MCL_FUTURE);
1457
1458         if (logsink)
1459                 log_thread_start();
1460
1461         condlog(2, "--------start up--------");
1462         condlog(2, "read " DEFAULT_CONFIGFILE);
1463
1464         if (load_config(DEFAULT_CONFIGFILE))
1465                 exit(1);
1466
1467         setlogmask(LOG_UPTO(conf->verbosity + 3));
1468
1469         /*
1470          * fill the voids left in the config file
1471          */
1472         if (!conf->checkint) {
1473                 conf->checkint = CHECKINT;
1474                 conf->max_checkint = MAX_CHECKINT;
1475         }
1476
1477         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1478                 if (logsink)
1479                         log_thread_stop();
1480
1481                 exit(1);
1482         }
1483         signal_init();
1484         setscheduler();
1485         set_oom_adj(-17);
1486         vecs = init_paths();
1487
1488         if (!vecs)
1489                 exit(1);
1490
1491         if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
1492                 condlog(0, "can not find sysfs mount point");
1493                 exit(1);
1494         }
1495
1496         /*
1497          * fetch paths and multipaths lists
1498          * no paths and/or no multipaths are valid scenarii
1499          * vectors maintenance will be driven by events
1500          */
1501         path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
1502         map_discovery(vecs);
1503
1504         /*
1505          * start threads
1506          */
1507         pthread_attr_init(&attr);
1508         pthread_attr_setstacksize(&attr, 64 * 1024);
1509         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1510         
1511         pthread_create(&check_thr, &attr, checkerloop, vecs);
1512         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1513         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1514
1515         pthread_cond_wait(&exit_cond, &exit_mutex);
1516
1517         /*
1518          * exit path
1519          */
1520         lock(vecs->lock);
1521         remove_maps(vecs);
1522         free_pathvec(vecs->pathvec, FREE_PATHS);
1523
1524         pthread_cancel(check_thr);
1525         pthread_cancel(uevent_thr);
1526         pthread_cancel(uxlsnr_thr);
1527
1528         free_keys(keys);
1529         keys = NULL;
1530         free_handlers(handlers);
1531         handlers = NULL;
1532         free_polls();
1533
1534         unlock(vecs->lock);
1535         pthread_mutex_destroy(vecs->lock);
1536         FREE(vecs->lock);
1537         vecs->lock = NULL;
1538         FREE(vecs);
1539         vecs = NULL;
1540         free_config(conf);
1541         conf = NULL;
1542
1543         condlog(2, "--------shut down-------");
1544         
1545         if (logsink)
1546                 log_thread_stop();
1547
1548 #ifdef _DEBUG_
1549         dbg_free_final(NULL);
1550 #endif
1551
1552         exit(0);
1553 }
1554
1555 int
1556 main (int argc, char *argv[])
1557 {
1558         extern char *optarg;
1559         extern int optind;
1560         int arg;
1561         int err;
1562         
1563         logsink = 1;
1564
1565         if (getuid() != 0) {
1566                 fprintf(stderr, "need to be root\n");
1567                 exit(1);
1568         }
1569
1570         /* make sure we don't lock any path */
1571         chdir("/");
1572         umask(umask(077) | 022);
1573
1574         conf = alloc_config();
1575
1576         if (!conf)
1577                 exit(1);
1578
1579         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1580         switch(arg) {
1581                 case 'd':
1582                         logsink = 0;
1583                         //debug=1; /* ### comment me out ### */
1584                         break;
1585                 case 'v':
1586                         if (sizeof(optarg) > sizeof(char *) ||
1587                             !isdigit(optarg[0]))
1588                                 exit(1);
1589
1590                         conf->verbosity = atoi(optarg);
1591                         break;
1592                 case 'k':
1593                         uxclnt(optarg);
1594                         exit(0);
1595                 default:
1596                         ;
1597                 }
1598         }
1599
1600         if (!logsink)
1601                 err = 0;
1602         else
1603                 err = fork();
1604         
1605         if (err < 0)
1606                 /* error */
1607                 exit(1);
1608         else if (err > 0)
1609                 /* parent dies */
1610                 exit(0);
1611         else
1612                 /* child lives */
1613                 return (child(NULL));
1614 }