Remove DAEMON defines
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17
18 /*
19  * libcheckers
20  */
21 #include <checkers.h>
22
23 /*
24  * libmultipath
25  */
26 #include <parser.h>
27 #include <vector.h>
28 #include <memory.h>
29 #include <config.h>
30 #include <util.h>
31 #include <hwtable.h>
32 #include <defaults.h>
33 #include <structs.h>
34 #include <callout.h>
35 #include <blacklist.h>
36 #include <structs_vec.h>
37 #include <dmparser.h>
38 #include <devmapper.h>
39 #include <sysfs.h>
40 #include <dict.h>
41 #include <discovery.h>
42 #include <debug.h>
43 #include <propsel.h>
44 #include <uevent.h>
45 #include <switchgroup.h>
46 #include <print.h>
47 #include <configure.h>
48 #include <prio.h>
49
50 #include "main.h"
51 #include "pidfile.h"
52 #include "uxlsnr.h"
53 #include "uxclnt.h"
54 #include "cli.h"
55 #include "cli_handlers.h"
56 #include "lock.h"
57 #include "waiter.h"
58
59 #define FILE_NAME_SIZE 256
60 #define CMDSIZE 160
61
62 #define LOG_MSG(a,b) \
63         if (strlen(b)) condlog(a, "%s: %s", pp->dev, b);
64
65 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
66 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
67
68 int logsink;
69
70 /*
71  * global copy of vecs for use in sig handlers
72  */
73 struct vectors * gvecs;
74
75 static int
76 need_switch_pathgroup (struct multipath * mpp, int refresh)
77 {
78         struct pathgroup * pgp;
79         struct path * pp;
80         unsigned int i, j;
81
82         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
83                 return 0;
84
85         /*
86          * Refresh path priority values
87          */
88         if (refresh)
89                 vector_foreach_slot (mpp->pg, pgp, i)
90                         vector_foreach_slot (pgp->paths, pp, j)
91                                 pathinfo(pp, conf->hwtable, DI_PRIO);
92
93         mpp->bestpg = select_path_group(mpp);
94
95         if (mpp->bestpg != mpp->nextpg)
96                 return 1;
97
98         return 0;
99 }
100
101 static void
102 switch_pathgroup (struct multipath * mpp)
103 {
104         mpp->stat_switchgroup++;
105         dm_switchgroup(mpp->alias, mpp->bestpg);
106         condlog(2, "%s: switch to path group #%i",
107                  mpp->alias, mpp->bestpg);
108 }
109
110 static int
111 coalesce_maps(struct vectors *vecs, vector nmpv)
112 {
113         struct multipath * ompp;
114         vector ompv = vecs->mpvec;
115         unsigned int i;
116         int j;
117
118         vector_foreach_slot (ompv, ompp, i) {
119                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
120                         /*
121                          * remove all current maps not allowed by the
122                          * current configuration
123                          */
124                         if (dm_flush_map(ompp->alias, DEFAULT_TARGET)) {
125                                 condlog(0, "%s: unable to flush devmap",
126                                         ompp->alias);
127                                 /*
128                                  * may be just because the device is open
129                                  */
130                                 if (!vector_alloc_slot(nmpv))
131                                         return 1;
132
133                                 vector_set_slot(nmpv, ompp);
134                                 setup_multipath(vecs, ompp);
135
136                                 if ((j = find_slot(ompv, (void *)ompp)) != -1)
137                                         vector_del_slot(ompv, j);
138
139                                 continue;
140                         }
141                         else {
142                                 dm_lib_release();
143                                 condlog(3, "%s devmap removed", ompp->alias);
144                         }
145                 }
146         }
147         return 0;
148 }
149
150 static void
151 sync_map_state(struct multipath *mpp)
152 {
153         struct pathgroup *pgp;
154         struct path *pp;
155         unsigned int i, j;
156
157         vector_foreach_slot (mpp->pg, pgp, i){
158                 vector_foreach_slot (pgp->paths, pp, j){
159                         if (pp->state <= PATH_UNCHECKED)
160                                 continue;
161                         if ((pp->dmstate == PSTATE_FAILED ||
162                              pp->dmstate == PSTATE_UNDEF) &&
163                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
164                                 dm_reinstate_path(mpp->alias, pp->dev_t);
165                         else if ((pp->dmstate == PSTATE_ACTIVE ||
166                                   pp->dmstate == PSTATE_UNDEF) &&
167                                  (pp->state == PATH_DOWN ||
168                                   pp->state == PATH_SHAKY))
169                                 dm_fail_path(mpp->alias, pp->dev_t);
170                 }
171         }
172 }
173
174 static void
175 sync_maps_state(vector mpvec)
176 {
177         unsigned int i;
178         struct multipath *mpp;
179
180         vector_foreach_slot (mpvec, mpp, i)
181                 sync_map_state(mpp);
182 }
183
184 static int
185 flush_map(struct multipath * mpp, struct vectors * vecs)
186 {
187         /*
188          * clear references to this map before flushing so we can ignore
189          * the spurious uevent we may generate with the dm_flush_map call below
190          */
191         if (dm_flush_map(mpp->alias, DEFAULT_TARGET)) {
192                 /*
193                  * May not really be an error -- if the map was already flushed
194                  * from the device mapper by dmsetup(8) for instance.
195                  */
196                 condlog(0, "%s: can't flush", mpp->alias);
197                 return 1;
198         }
199         else {
200                 dm_lib_release();
201                 condlog(3, "%s: devmap removed", mpp->alias);
202         }
203
204         orphan_paths(vecs->pathvec, mpp);
205         remove_map_and_stop_waiter(mpp, vecs, 1);
206
207         return 0;
208 }
209
210 static int
211 uev_add_map (struct sysfs_device * dev, struct vectors * vecs)
212 {
213         condlog(2, "%s: add map (uevent)", dev->kernel);
214         return ev_add_map(dev, vecs);
215 }
216
217 int
218 ev_add_map (struct sysfs_device * dev, struct vectors * vecs)
219 {
220         char * alias;
221         char *dev_t;
222         int major, minor;
223         char * refwwid;
224         struct multipath * mpp;
225         int map_present;
226         int r = 1;
227
228         dev_t = sysfs_attr_get_value(dev->devpath, "dev");
229
230         if (!dev_t || sscanf(dev_t, "%d:%d", &major, &minor) != 2)
231                 return 1;
232
233         alias = dm_mapname(major, minor);
234
235         if (!alias)
236                 return 1;
237
238         map_present = dm_map_present(alias);
239
240         if (map_present && dm_type(alias, DEFAULT_TARGET) <= 0) {
241                 condlog(4, "%s: not a multipath map", alias);
242                 return 0;
243         }
244
245         mpp = find_mp_by_alias(vecs->mpvec, alias);
246
247         if (mpp) {
248                 /*
249                  * Not really an error -- we generate our own uevent
250                  * if we create a multipath mapped device as a result
251                  * of uev_add_path
252                  */
253                 condlog(0, "%s: devmap already registered",
254                         dev->kernel);
255                 return 0;
256         }
257
258         /*
259          * now we can register the map
260          */
261         if (map_present && (mpp = add_map_without_path(vecs, minor, alias))) {
262                 sync_map_state(mpp);
263                 condlog(3, "%s: devmap %s added", alias, dev->kernel);
264                 return 0;
265         }
266         refwwid = get_refwwid(dev->kernel, DEV_DEVMAP, vecs->pathvec);
267
268         if (refwwid) {
269                 r = coalesce_paths(vecs, NULL, refwwid);
270                 dm_lib_release();
271         }
272
273         if (!r)
274                 condlog(3, "%s: devmap %s added", alias, dev->kernel);
275         else
276                 condlog(0, "%s: uev_add_map %s failed", alias, dev->kernel);
277
278         FREE(refwwid);
279         return r;
280 }
281
282 static int
283 uev_remove_map (struct sysfs_device * dev, struct vectors * vecs)
284 {
285         condlog(2, "%s: remove map (uevent)", dev->kernel);
286         return ev_remove_map(dev->kernel, vecs);
287 }
288
289 int
290 ev_remove_map (char * devname, struct vectors * vecs)
291 {
292         struct multipath * mpp;
293
294         mpp = find_mp_by_str(vecs->mpvec, devname);
295
296         if (!mpp) {
297                 condlog(3, "%s: devmap not registered, can't remove",
298                         devname);
299                 return 0;
300         }
301         flush_map(mpp, vecs);
302
303         return 0;
304 }
305
306 static int
307 uev_umount_map (struct sysfs_device * dev, struct vectors * vecs)
308 {
309         struct multipath * mpp;
310
311         condlog(2, "%s: umount map (uevent)", dev->kernel);
312
313         mpp = find_mp_by_str(vecs->mpvec, dev->kernel);
314
315         if (!mpp)
316                 return 0;
317
318         update_mpp_paths(mpp, vecs->pathvec);
319         verify_paths(mpp, vecs, NULL);
320
321         if (!VECTOR_SIZE(mpp->paths))
322                 flush_map(mpp, vecs);
323
324         return 0;
325 }
326
327 static int
328 uev_add_path (struct sysfs_device * dev, struct vectors * vecs)
329 {
330         condlog(2, "%s: add path (uevent)", dev->kernel);
331         return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
332 }
333
334
335 /*
336  * returns:
337  * 0: added
338  * 1: error
339  * 2: blacklisted
340  */
341 int
342 ev_add_path (char * devname, struct vectors * vecs)
343 {
344         struct multipath * mpp;
345         struct path * pp;
346         char empty_buff[WWID_SIZE] = {0};
347
348         pp = find_path_by_dev(vecs->pathvec, devname);
349
350         if (pp) {
351                 condlog(0, "%s: spurious uevent, path already in pathvec",
352                         devname);
353                 if (pp->mpp)
354                         return 0;
355         }
356         else {
357                 /*
358                  * get path vital state
359                  */
360                 if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
361                       devname, DI_ALL))) {
362                         condlog(0, "%s: failed to store path info", devname);
363                         return 1;
364                 }
365                 pp->checkint = conf->checkint;
366         }
367
368         /*
369          * need path UID to go any further
370          */
371         if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
372                 condlog(0, "%s: failed to get path uid", devname);
373                 return 1; /* leave path added to pathvec */
374         }
375         if (filter_path(conf, pp) > 0){
376                 int i = find_slot(vecs->pathvec, (void *)pp);
377                 if (i != -1)
378                         vector_del_slot(vecs->pathvec, i);
379                 free_path(pp);
380                 return 2;
381         }
382         mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
383 rescan:
384         if (mpp) {
385                 if (adopt_paths(vecs->pathvec, mpp))
386                         return 1; /* leave path added to pathvec */
387
388                 verify_paths(mpp, vecs, NULL);
389                 mpp->action = ACT_RELOAD;
390         }
391         else {
392                 if ((mpp = add_map_with_path(vecs, pp, 1)))
393                         mpp->action = ACT_CREATE;
394                 else
395                         return 1; /* leave path added to pathvec */
396         }
397
398         /*
399          * push the map to the device-mapper
400          */
401         if (setup_map(mpp)) {
402                 condlog(0, "%s: failed to setup map for addition of new "
403                         "path %s", mpp->alias, devname);
404                 goto out;
405         }
406         /*
407          * reload the map for the multipath mapped device
408          */
409         if (domap(mpp) <= 0) {
410                 condlog(0, "%s: failed in domap for addition of new "
411                         "path %s", mpp->alias, devname);
412                 /*
413                  * deal with asynchronous uevents :((
414                  */
415                 if (mpp->action == ACT_RELOAD) {
416                         condlog(0, "%s: uev_add_path sleep", mpp->alias);
417                         sleep(1);
418                         update_mpp_paths(mpp, vecs->pathvec);
419                         goto rescan;
420                 }
421                 else
422                         goto out;
423         }
424         dm_lib_release();
425
426         /*
427          * update our state from kernel regardless of create or reload
428          */
429         if (setup_multipath(vecs, mpp))
430                 goto out;
431
432         sync_map_state(mpp);
433
434         if (mpp->action == ACT_CREATE &&
435             start_waiter_thread(mpp, vecs))
436                         goto out;
437
438         condlog(3, "%s path added to devmap %s", devname, mpp->alias);
439         return 0;
440
441 out:
442         remove_map(mpp, vecs, 1);
443         return 1;
444 }
445
446 static int
447 uev_remove_path (struct sysfs_device * dev, struct vectors * vecs)
448 {
449         int retval;
450
451         condlog(2, "%s: remove path (uevent)", dev->kernel);
452         retval = ev_remove_path(dev->kernel, vecs);
453         if (!retval)
454                 sysfs_device_put(dev);
455
456         return retval;
457 }
458
459 int
460 ev_remove_path (char * devname, struct vectors * vecs)
461 {
462         struct multipath * mpp;
463         struct path * pp;
464         int i;
465         int rm_path = 1;
466
467         pp = find_path_by_dev(vecs->pathvec, devname);
468
469         if (!pp) {
470                 condlog(0, "%s: spurious uevent, path not in pathvec", devname);
471                 return 1;
472         }
473
474         /*
475          * avoid referring to the map of an orphanned path
476          */
477         if ((mpp = pp->mpp)) {
478
479                 /*
480                  * remove the map IFF removing the last path
481                  */
482                 if (pathcount(mpp, PATH_WILD) > 1) {
483                         vector rpvec = vector_alloc();
484
485                         /*
486                          * transform the mp->pg vector of vectors of paths
487                          * into a mp->params string to feed the device-mapper
488                          */
489                         update_mpp_paths(mpp, vecs->pathvec);
490                         if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
491                                 vector_del_slot(mpp->paths, i);
492
493                         if (VECTOR_SIZE(mpp->paths) == 0) {
494                                 char alias[WWID_SIZE];
495
496                                 /*
497                                  * flush_map will fail if the device is open
498                                  */
499                                 strncpy(alias, mpp->alias, WWID_SIZE);
500                                 if (flush_map(mpp, vecs))
501                                         rm_path = 0;
502                                 else
503                                         condlog(3, "%s: removed map after removing"
504                                                 " multiple paths", alias);
505                         }
506                         else {
507                                 if (setup_map(mpp)) {
508                                         condlog(0, "%s: failed to setup map for"
509                                                 " removal of path %s", mpp->alias, devname);
510                                         free_pathvec(rpvec, KEEP_PATHS);
511                                         goto out;
512                                 }
513                                 /*
514                                  * reload the map
515                                  */
516                                 mpp->action = ACT_RELOAD;
517                                 if (domap(mpp) <= 0) {
518                                         condlog(0, "%s: failed in domap for "
519                                                 "removal of path %s",
520                                                 mpp->alias, devname);
521                                         /*
522                                          * Delete path from pathvec so that
523                                          * update_mpp_paths wont find it later
524                                          * when/if another path is removed.
525                                          */
526                                         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
527                                                 vector_del_slot(vecs->pathvec, i);
528                                         free_path(pp);
529                                         return 1;
530                                 }
531                                 /*
532                                  * update our state from kernel
533                                  */
534                                 if (setup_multipath(vecs, mpp)) {
535                                         free_pathvec(rpvec, KEEP_PATHS);
536                                         goto out;
537                                 }
538                                 sync_map_state(mpp);
539
540                                 condlog(3, "%s: path removed from map %s",
541                                         devname, mpp->alias);
542                         }
543                         free_pathvec(rpvec, KEEP_PATHS);
544                 }
545                 else {
546                         char alias[WWID_SIZE];
547
548                         /*
549                          * flush_map will fail if the device is open
550                          */
551                         strncpy(alias, mpp->alias, WWID_SIZE);
552                         if (flush_map(mpp, vecs))
553                                 rm_path = 0;
554                         else
555                                 condlog(3, "%s: removed map", alias);
556                 }
557         }
558
559         if (rm_path) {
560                 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
561                         vector_del_slot(vecs->pathvec, i);
562                 free_path(pp);
563         }
564
565         return 0;
566
567 out:
568         remove_map_and_stop_waiter(mpp, vecs, 1);
569         return 1;
570 }
571
572 static int
573 map_discovery (struct vectors * vecs)
574 {
575         struct multipath * mpp;
576         unsigned int i;
577
578         if (dm_get_maps(vecs->mpvec, "multipath"))
579                 return 1;
580
581         vector_foreach_slot (vecs->mpvec, mpp, i)
582                 if (setup_multipath(vecs, mpp))
583                         return 1;
584
585         return 0;
586 }
587
588 int
589 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
590 {
591         struct vectors * vecs;
592         int r;
593
594         *reply = NULL;
595         *len = 0;
596         vecs = (struct vectors *)trigger_data;
597
598         pthread_cleanup_push(cleanup_lock, vecs->lock);
599         lock(vecs->lock);
600
601         r = parse_cmd(str, reply, len, vecs);
602
603         if (r > 0) {
604                 *reply = STRDUP("fail\n");
605                 *len = strlen(*reply) + 1;
606                 r = 1;
607         }
608         else if (!r && *len == 0) {
609                 *reply = STRDUP("ok\n");
610                 *len = strlen(*reply) + 1;
611                 r = 0;
612         }
613         /* else if (r < 0) leave *reply alone */
614
615         lock_cleanup_pop(vecs->lock);
616         return r;
617 }
618
619 static int
620 uev_discard(char * devpath)
621 {
622         char a[10], b[10];
623
624         /*
625          * keep only block devices, discard partitions
626          */
627         if (sscanf(devpath, "/block/%10s", a) != 1 ||
628             sscanf(devpath, "/block/%10[^/]/%10s", a, b) == 2) {
629                 condlog(4, "discard event on %s", devpath);
630                 return 1;
631         }
632         return 0;
633 }
634
635 int
636 uev_trigger (struct uevent * uev, void * trigger_data)
637 {
638         int r = 0;
639         struct sysfs_device *sysdev;
640         struct vectors * vecs;
641
642         vecs = (struct vectors *)trigger_data;
643
644         if (uev_discard(uev->devpath))
645                 return 0;
646
647         sysdev = sysfs_device_get(uev->devpath);
648         if(!sysdev)
649                 return 0;
650
651         lock(vecs->lock);
652
653         /*
654          * device map event
655          * Add events are ignored here as the tables
656          * are not fully initialised then.
657          */
658         if (!strncmp(sysdev->kernel, "dm-", 3)) {
659                 if (!strncmp(uev->action, "change", 6)) {
660                         r = uev_add_map(sysdev, vecs);
661                         goto out;
662                 }
663                 if (!strncmp(uev->action, "remove", 6)) {
664                         r = uev_remove_map(sysdev, vecs);
665                         goto out;
666                 }
667                 if (!strncmp(uev->action, "umount", 6)) {
668                         r = uev_umount_map(sysdev, vecs);
669                         goto out;
670                 }
671                 goto out;
672         }
673
674         /*
675          * path add/remove event
676          */
677         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
678                            sysdev->kernel) > 0)
679                 goto out;
680
681         if (!strncmp(uev->action, "add", 3)) {
682                 r = uev_add_path(sysdev, vecs);
683                 goto out;
684         }
685         if (!strncmp(uev->action, "remove", 6)) {
686                 r = uev_remove_path(sysdev, vecs);
687                 goto out;
688         }
689
690 out:
691         unlock(vecs->lock);
692         return r;
693 }
694
695 static void *
696 ueventloop (void * ap)
697 {
698         if (uevent_listen(&uev_trigger, ap))
699                 fprintf(stderr, "error starting uevent listener");
700
701         return NULL;
702 }
703
704 static void *
705 uxlsnrloop (void * ap)
706 {
707         if (cli_init())
708                 return NULL;
709
710         set_handler_callback(LIST+PATHS, cli_list_paths);
711         set_handler_callback(LIST+MAPS, cli_list_maps);
712         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
713         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
714         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
715         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
716         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
717         set_handler_callback(LIST+CONFIG, cli_list_config);
718         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
719         set_handler_callback(LIST+DEVICES, cli_list_devices);
720         set_handler_callback(ADD+PATH, cli_add_path);
721         set_handler_callback(DEL+PATH, cli_del_path);
722         set_handler_callback(ADD+MAP, cli_add_map);
723         set_handler_callback(DEL+MAP, cli_del_map);
724         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
725         set_handler_callback(RECONFIGURE, cli_reconfigure);
726         set_handler_callback(SUSPEND+MAP, cli_suspend);
727         set_handler_callback(RESUME+MAP, cli_resume);
728         set_handler_callback(REINSTATE+PATH, cli_reinstate);
729         set_handler_callback(FAIL+PATH, cli_fail);
730
731         uxsock_listen(&uxsock_trigger, ap);
732
733         return NULL;
734 }
735
736 static int
737 exit_daemon (int status)
738 {
739         if (status != 0)
740                 fprintf(stderr, "bad exit status. see daemon.log\n");
741
742         condlog(3, "unlink pidfile");
743         unlink(DEFAULT_PIDFILE);
744
745         lock(&exit_mutex);
746         pthread_cond_signal(&exit_cond);
747         unlock(&exit_mutex);
748
749         return status;
750 }
751
752 static void
753 fail_path (struct path * pp, int del_active)
754 {
755         if (!pp->mpp)
756                 return;
757
758         condlog(2, "checker failed path %s in map %s",
759                  pp->dev_t, pp->mpp->alias);
760
761         dm_fail_path(pp->mpp->alias, pp->dev_t);
762         if (del_active)
763                 update_queue_mode_del_path(pp->mpp);
764 }
765
766 /*
767  * caller must have locked the path list before calling that function
768  */
769 static void
770 reinstate_path (struct path * pp, int add_active)
771 {
772         if (!pp->mpp)
773                 return;
774
775         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
776                 condlog(0, "%s: reinstate failed", pp->dev_t);
777         else {
778                 condlog(2, "%s: reinstated", pp->dev_t);
779                 if (add_active)
780                         update_queue_mode_add_path(pp->mpp);
781         }
782 }
783
784 static void
785 enable_group(struct path * pp)
786 {
787         struct pathgroup * pgp;
788
789         /*
790          * if path is added through uev_add_path, pgindex can be unset.
791          * next update_strings() will set it, upon map reload event.
792          *
793          * we can safely return here, because upon map reload, all
794          * PG will be enabled.
795          */
796         if (!pp->mpp->pg || !pp->pgindex)
797                 return;
798
799         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
800
801         if (pgp->status == PGSTATE_DISABLED) {
802                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
803                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
804         }
805 }
806
807 static void
808 mpvec_garbage_collector (struct vectors * vecs)
809 {
810         struct multipath * mpp;
811         unsigned int i;
812
813         vector_foreach_slot (vecs->mpvec, mpp, i) {
814                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
815                         condlog(2, "%s: remove dead map", mpp->alias);
816                         remove_map_and_stop_waiter(mpp, vecs, 1);
817                         i--;
818                 }
819         }
820 }
821
822 static void
823 defered_failback_tick (vector mpvec)
824 {
825         struct multipath * mpp;
826         unsigned int i;
827
828         vector_foreach_slot (mpvec, mpp, i) {
829                 /*
830                  * defered failback getting sooner
831                  */
832                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
833                         mpp->failback_tick--;
834
835                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
836                                 switch_pathgroup(mpp);
837                 }
838         }
839 }
840
841 static void
842 retry_count_tick(vector mpvec)
843 {
844         struct multipath *mpp;
845         unsigned int i;
846
847         vector_foreach_slot (mpvec, mpp, i) {
848                 if (mpp->retry_tick) {
849                         mpp->stat_total_queueing_time++;
850                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
851                         if(--mpp->retry_tick == 0) {
852                                 dm_queue_if_no_path(mpp->alias, 0);
853                                 condlog(2, "%s: Disable queueing", mpp->alias);
854                         }
855                 }
856         }
857 }
858
859 void
860 check_path (struct vectors * vecs, struct path * pp)
861 {
862         int newstate;
863
864         if (!pp->mpp)
865                 return;
866
867         if (pp->tick && --pp->tick)
868                 return; /* don't check this path yet */
869
870         /*
871          * provision a next check soonest,
872          * in case we exit abnormaly from here
873          */
874         pp->tick = conf->checkint;
875
876         if (!checker_selected(&pp->checker)) {
877                 pathinfo(pp, conf->hwtable, DI_SYSFS);
878                 select_checker(pp);
879         }
880         if (!checker_selected(&pp->checker)) {
881                 condlog(0, "%s: checker is not set", pp->dev);
882                 return;
883         }
884         /*
885          * Set checker in async mode.
886          * Honored only by checker implementing the said mode.
887          */
888         checker_set_async(&pp->checker);
889
890         newstate = checker_check(&pp->checker);
891
892         if (newstate < 0) {
893                 condlog(2, "%s: unusable path", pp->dev);
894                 pathinfo(pp, conf->hwtable, 0);
895                 return;
896         }
897         /*
898          * Async IO in flight. Keep the previous path state
899          * and reschedule as soon as possible
900          */
901         if (newstate == PATH_PENDING) {
902                 pp->tick = 1;
903                 return;
904         }
905         if (newstate != pp->state) {
906                 int oldstate = pp->state;
907                 pp->state = newstate;
908                 LOG_MSG(1, checker_message(&pp->checker));
909
910                 /*
911                  * upon state change, reset the checkint
912                  * to the shortest delay
913                  */
914                 pp->checkint = conf->checkint;
915
916                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
917                     update_multipath_strings(pp->mpp, vecs->pathvec)) {
918                         /*
919                          * proactively fail path in the DM
920                          */
921                         if (oldstate == PATH_UP ||
922                             oldstate == PATH_GHOST)
923                                 fail_path(pp, 1);
924                         else
925                                 fail_path(pp, 0);
926
927                         /*
928                          * cancel scheduled failback
929                          */
930                         pp->mpp->failback_tick = 0;
931
932                         pp->mpp->stat_path_failures++;
933                         return;
934                 }
935
936                 /*
937                  * reinstate this path
938                  */
939                 if (oldstate != PATH_UP &&
940                     oldstate != PATH_GHOST)
941                         reinstate_path(pp, 1);
942                 else
943                         reinstate_path(pp, 0);
944
945                 /*
946                  * schedule [defered] failback
947                  */
948                 if (pp->mpp->pgfailback > 0)
949                         pp->mpp->failback_tick =
950                                 pp->mpp->pgfailback + 1;
951                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
952                     need_switch_pathgroup(pp->mpp, 1))
953                         switch_pathgroup(pp->mpp);
954
955                 /*
956                  * if at least one path is up in a group, and
957                  * the group is disabled, re-enable it
958                  */
959                 if (newstate == PATH_UP)
960                         enable_group(pp);
961         }
962         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
963                 LOG_MSG(4, checker_message(&pp->checker));
964                 /*
965                  * double the next check delay.
966                  * max at conf->max_checkint
967                  */
968                 if (pp->checkint < (conf->max_checkint / 2))
969                         pp->checkint = 2 * pp->checkint;
970                 else
971                         pp->checkint = conf->max_checkint;
972
973                 pp->tick = pp->checkint;
974                 condlog(4, "%s: delay next check %is",
975                                 pp->dev_t, pp->tick);
976         }
977         else if (newstate == PATH_DOWN)
978                 LOG_MSG(2, checker_message(&pp->checker));
979
980         pp->state = newstate;
981
982         /*
983          * path prio refreshing
984          */
985         condlog(4, "path prio refresh");
986         pathinfo(pp, conf->hwtable, DI_PRIO);
987
988         /*
989          * pathgroup failback policy
990          */
991         if (need_switch_pathgroup(pp->mpp, 0)) {
992                 if (pp->mpp->pgfailback > 0 &&
993                     pp->mpp->failback_tick <= 0)
994                         pp->mpp->failback_tick =
995                                 pp->mpp->pgfailback + 1;
996                 else if (pp->mpp->pgfailback ==
997                                 -FAILBACK_IMMEDIATE)
998                         switch_pathgroup(pp->mpp);
999         }
1000 }
1001
1002 static void *
1003 checkerloop (void *ap)
1004 {
1005         struct vectors *vecs;
1006         struct path *pp;
1007         int count = 0;
1008         unsigned int i;
1009
1010         mlockall(MCL_CURRENT | MCL_FUTURE);
1011         vecs = (struct vectors *)ap;
1012         condlog(2, "path checkers start up");
1013
1014         /*
1015          * init the path check interval
1016          */
1017         vector_foreach_slot (vecs->pathvec, pp, i) {
1018                 pp->checkint = conf->checkint;
1019         }
1020
1021         while (1) {
1022                 pthread_cleanup_push(cleanup_lock, vecs->lock);
1023                 lock(vecs->lock);
1024                 condlog(4, "tick");
1025
1026                 vector_foreach_slot (vecs->pathvec, pp, i) {
1027                         check_path(vecs, pp);
1028                 }
1029                 defered_failback_tick(vecs->mpvec);
1030                 retry_count_tick(vecs->mpvec);
1031
1032                 if (count)
1033                         count--;
1034                 else {
1035                         condlog(4, "map garbage collection");
1036                         mpvec_garbage_collector(vecs);
1037                         count = MAPGCINT;
1038                 }
1039
1040                 lock_cleanup_pop(vecs->lock);
1041                 sleep(1);
1042         }
1043         return NULL;
1044 }
1045
1046 int
1047 configure (struct vectors * vecs, int start_waiters)
1048 {
1049         struct multipath * mpp;
1050         struct path * pp;
1051         vector mpvec;
1052         int i;
1053
1054         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1055                 return 1;
1056
1057         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1058                 return 1;
1059
1060         if (!(mpvec = vector_alloc()))
1061                 return 1;
1062
1063         /*
1064          * probe for current path (from sysfs) and map (from dm) sets
1065          */
1066         path_discovery(vecs->pathvec, conf, DI_ALL);
1067
1068         vector_foreach_slot (vecs->pathvec, pp, i){
1069                 if (filter_path(conf, pp) > 0){
1070                         vector_del_slot(vecs->pathvec, i);
1071                         free_path(pp);
1072                         i--;
1073                 }
1074                 else
1075                         pp->checkint = conf->checkint;
1076         }
1077         if (map_discovery(vecs))
1078                 return 1;
1079
1080         /*
1081          * create new set of maps & push changed ones into dm
1082          */
1083         if (coalesce_paths(vecs, mpvec, NULL))
1084                 return 1;
1085
1086         /*
1087          * may need to remove some maps which are no longer relevant
1088          * e.g., due to blacklist changes in conf file
1089          */
1090         if (coalesce_maps(vecs, mpvec))
1091                 return 1;
1092
1093         dm_lib_release();
1094
1095         sync_maps_state(mpvec);
1096
1097         /*
1098          * purge dm of old maps
1099          */
1100         remove_maps(vecs);
1101
1102         /*
1103          * save new set of maps formed by considering current path state
1104          */
1105         vector_free(vecs->mpvec);
1106         vecs->mpvec = mpvec;
1107
1108         /*
1109          * start dm event waiter threads for these new maps
1110          */
1111         vector_foreach_slot(vecs->mpvec, mpp, i) {
1112                 if (setup_multipath(vecs, mpp))
1113                         return 1;
1114                 if (start_waiters)
1115                         if (start_waiter_thread(mpp, vecs))
1116                                 return 1;
1117         }
1118         return 0;
1119 }
1120
1121 int
1122 reconfigure (struct vectors * vecs)
1123 {
1124         struct config * old = conf;
1125
1126         /*
1127          * free old map and path vectors ... they use old conf state
1128          */
1129         if (VECTOR_SIZE(vecs->mpvec))
1130                 remove_maps_and_stop_waiters(vecs);
1131
1132         if (VECTOR_SIZE(vecs->pathvec))
1133                 free_pathvec(vecs->pathvec, FREE_PATHS);
1134
1135         vecs->pathvec = NULL;
1136         conf = NULL;
1137
1138         if (load_config(DEFAULT_CONFIGFILE))
1139                 return 1;
1140
1141         conf->verbosity = old->verbosity;
1142
1143         if (!conf->checkint) {
1144                 conf->checkint = DEFAULT_CHECKINT;
1145                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1146         }
1147         configure(vecs, 1);
1148         free_config(old);
1149         return 0;
1150 }
1151
1152 static struct vectors *
1153 init_vecs (void)
1154 {
1155         struct vectors * vecs;
1156
1157         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1158
1159         if (!vecs)
1160                 return NULL;
1161
1162         vecs->lock =
1163                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1164
1165         if (!vecs->lock)
1166                 goto out;
1167
1168         pthread_mutex_init(vecs->lock, NULL);
1169
1170         return vecs;
1171
1172 out:
1173         FREE(vecs);
1174         condlog(0, "failed to init paths");
1175         return NULL;
1176 }
1177
1178 static void *
1179 signal_set(int signo, void (*func) (int))
1180 {
1181         int r;
1182         struct sigaction sig;
1183         struct sigaction osig;
1184
1185         sig.sa_handler = func;
1186         sigemptyset(&sig.sa_mask);
1187         sig.sa_flags = 0;
1188
1189         r = sigaction(signo, &sig, &osig);
1190
1191         if (r < 0)
1192                 return (SIG_ERR);
1193         else
1194                 return (osig.sa_handler);
1195 }
1196
1197 static void
1198 sighup (int sig)
1199 {
1200         condlog(2, "reconfigure (SIGHUP)");
1201
1202         lock(gvecs->lock);
1203         reconfigure(gvecs);
1204         unlock(gvecs->lock);
1205
1206 #ifdef _DEBUG_
1207         dbg_free_final(NULL);
1208 #endif
1209 }
1210
1211 static void
1212 sigend (int sig)
1213 {
1214         exit_daemon(0);
1215 }
1216
1217 static void
1218 sigusr1 (int sig)
1219 {
1220         condlog(3, "SIGUSR1 received");
1221 }
1222
1223 static void
1224 signal_init(void)
1225 {
1226         signal_set(SIGHUP, sighup);
1227         signal_set(SIGUSR1, sigusr1);
1228         signal_set(SIGINT, sigend);
1229         signal_set(SIGTERM, sigend);
1230         signal(SIGPIPE, SIG_IGN);
1231 }
1232
1233 static void
1234 setscheduler (void)
1235 {
1236         int res;
1237         static struct sched_param sched_param = {
1238                 .sched_priority = 99
1239         };
1240
1241         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1242
1243         if (res == -1)
1244                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1245         return;
1246 }
1247
1248 static void
1249 set_oom_adj (int val)
1250 {
1251         FILE *fp;
1252
1253         fp = fopen("/proc/self/oom_adj", "w");
1254
1255         if (!fp)
1256                 return;
1257
1258         fprintf(fp, "%i", val);
1259         fclose(fp);
1260 }
1261
1262 static int
1263 child (void * param)
1264 {
1265         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1266         pthread_attr_t attr;
1267         struct vectors * vecs;
1268
1269         mlockall(MCL_CURRENT | MCL_FUTURE);
1270
1271         if (logsink)
1272                 log_thread_start();
1273
1274         condlog(2, "--------start up--------");
1275         condlog(2, "read " DEFAULT_CONFIGFILE);
1276
1277         if (load_config(DEFAULT_CONFIGFILE))
1278                 exit(1);
1279
1280         if (init_checkers()) {
1281                 condlog(0, "failed to initialize checkers");
1282                 exit(1);
1283         }
1284         if (init_prio()) {
1285                 condlog(0, "failed to initialize prioritizers");
1286                 exit(1);
1287         }
1288
1289         setlogmask(LOG_UPTO(conf->verbosity + 3));
1290
1291         /*
1292          * fill the voids left in the config file
1293          */
1294         if (!conf->checkint) {
1295                 conf->checkint = DEFAULT_CHECKINT;
1296                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1297         }
1298
1299         if (conf->max_fds) {
1300                 struct rlimit fd_limit;
1301                 if (conf->max_fds > 0) {
1302                         fd_limit.rlim_cur = conf->max_fds;
1303                         fd_limit.rlim_max = conf->max_fds;
1304                 }
1305                 else {
1306                         fd_limit.rlim_cur = RLIM_INFINITY;
1307                         fd_limit.rlim_max = RLIM_INFINITY;
1308                 }
1309                 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
1310                         condlog(0, "can't set open fds limit to %d : %s\n",
1311                                 conf->max_fds, strerror(errno));
1312         }
1313
1314         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1315                 if (logsink)
1316                         log_thread_stop();
1317
1318                 exit(1);
1319         }
1320         signal_init();
1321         setscheduler();
1322         set_oom_adj(-16);
1323         vecs = gvecs = init_vecs();
1324
1325         if (!vecs)
1326                 exit(1);
1327
1328         if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
1329                 condlog(0, "can not find sysfs mount point");
1330                 exit(1);
1331         }
1332
1333         /*
1334          * fetch and configure both paths and multipaths
1335          */
1336         if (configure(vecs, 1)) {
1337                 condlog(0, "failure during configuration");
1338                 exit(1);
1339         }
1340
1341         /*
1342          * start threads
1343          */
1344         pthread_attr_init(&attr);
1345         pthread_attr_setstacksize(&attr, 64 * 1024);
1346         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
1347
1348         pthread_create(&check_thr, &attr, checkerloop, vecs);
1349         pthread_create(&uevent_thr, &attr, ueventloop, vecs);
1350         pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
1351
1352         pthread_cond_wait(&exit_cond, &exit_mutex);
1353
1354         /*
1355          * exit path
1356          */
1357         lock(vecs->lock);
1358         remove_maps_and_stop_waiters(vecs);
1359         free_pathvec(vecs->pathvec, FREE_PATHS);
1360
1361         pthread_cancel(check_thr);
1362         pthread_cancel(uevent_thr);
1363         pthread_cancel(uxlsnr_thr);
1364
1365         sysfs_cleanup();
1366
1367         free_keys(keys);
1368         keys = NULL;
1369         free_handlers(handlers);
1370         handlers = NULL;
1371         free_polls();
1372
1373         unlock(vecs->lock);
1374         pthread_mutex_destroy(vecs->lock);
1375         FREE(vecs->lock);
1376         vecs->lock = NULL;
1377         FREE(vecs);
1378         vecs = NULL;
1379
1380         condlog(2, "--------shut down-------");
1381
1382         if (logsink)
1383                 log_thread_stop();
1384
1385         dm_lib_release();
1386         dm_lib_exit();
1387
1388         /*
1389          * Freeing config must be done after condlog() and dm_lib_exit(),
1390          * because logging functions like dlog() and dm_write_log()
1391          * reference the config.
1392          */
1393         free_config(conf);
1394         conf = NULL;
1395
1396 #ifdef _DEBUG_
1397         dbg_free_final(NULL);
1398 #endif
1399
1400         exit(0);
1401 }
1402
1403 static int
1404 daemonize(void)
1405 {
1406         int pid;
1407         int in_fd, out_fd;
1408
1409         if( (pid = fork()) < 0){
1410                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1411                 return -1;
1412         }
1413         else if (pid != 0)
1414                 return pid;
1415
1416         setsid();
1417
1418         if ( (pid = fork()) < 0)
1419                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1420         else if (pid != 0)
1421                 _exit(0);
1422
1423         in_fd = open("/dev/null", O_RDONLY);
1424         if (in_fd < 0){
1425                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1426                         strerror(errno));
1427                 _exit(0);
1428         }
1429         out_fd = open("/dev/console", O_WRONLY);
1430         if (out_fd < 0){
1431                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1432                         strerror(errno));
1433                 _exit(0);
1434         }
1435
1436         close(STDIN_FILENO);
1437         dup(in_fd);
1438         close(STDOUT_FILENO);
1439         dup(out_fd);
1440         close(STDERR_FILENO);
1441         dup(out_fd);
1442
1443         close(in_fd);
1444         close(out_fd);
1445         chdir("/");
1446         umask(0);
1447         return 0;
1448 }
1449
1450 int
1451 main (int argc, char *argv[])
1452 {
1453         extern char *optarg;
1454         extern int optind;
1455         int arg;
1456         int err;
1457
1458         logsink = 1;
1459         dm_init();
1460
1461         if (getuid() != 0) {
1462                 fprintf(stderr, "need to be root\n");
1463                 exit(1);
1464         }
1465
1466         /* make sure we don't lock any path */
1467         chdir("/");
1468         umask(umask(077) | 022);
1469
1470         conf = alloc_config();
1471
1472         if (!conf)
1473                 exit(1);
1474
1475         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1476         switch(arg) {
1477                 case 'd':
1478                         logsink = 0;
1479                         //debug=1; /* ### comment me out ### */
1480                         break;
1481                 case 'v':
1482                         if (sizeof(optarg) > sizeof(char *) ||
1483                             !isdigit(optarg[0]))
1484                                 exit(1);
1485
1486                         conf->verbosity = atoi(optarg);
1487                         break;
1488                 case 'k':
1489                         uxclnt(optarg);
1490                         exit(0);
1491                 default:
1492                         ;
1493                 }
1494         }
1495
1496         if (!logsink)
1497                 err = 0;
1498         else
1499                 err = daemonize();
1500
1501         if (err < 0)
1502                 /* error */
1503                 exit(1);
1504         else if (err > 0)
1505                 /* parent dies */
1506                 exit(0);
1507         else
1508                 /* child lives */
1509                 return (child(NULL));
1510 }
1511