Merge branch 'master' of git://git.kernel.org/pub/scm/linux/storage/multipath-tools/
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18
19 /*
20  * libcheckers
21  */
22 #include <checkers.h>
23
24 /*
25  * libmultipath
26  */
27 #include <parser.h>
28 #include <vector.h>
29 #include <memory.h>
30 #include <config.h>
31 #include <util.h>
32 #include <hwtable.h>
33 #include <defaults.h>
34 #include <structs.h>
35 #include <callout.h>
36 #include <blacklist.h>
37 #include <structs_vec.h>
38 #include <dmparser.h>
39 #include <devmapper.h>
40 #include <sysfs.h>
41 #include <dict.h>
42 #include <discovery.h>
43 #include <debug.h>
44 #include <propsel.h>
45 #include <uevent.h>
46 #include <switchgroup.h>
47 #include <print.h>
48 #include <configure.h>
49 #include <prio.h>
50
51 #include "main.h"
52 #include "pidfile.h"
53 #include "uxlsnr.h"
54 #include "uxclnt.h"
55 #include "cli.h"
56 #include "cli_handlers.h"
57 #include "lock.h"
58 #include "waiter.h"
59
60 #define FILE_NAME_SIZE 256
61 #define CMDSIZE 160
62
63 #define LOG_MSG(a,b) \
64         if (strlen(b)) condlog(a, "%s: %s", pp->dev, b);
65
66 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
67 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
68
69 int logsink;
70
71 /*
72  * global copy of vecs for use in sig handlers
73  */
74 struct vectors * gvecs;
75
76 static int
77 need_switch_pathgroup (struct multipath * mpp, int refresh)
78 {
79         struct pathgroup * pgp;
80         struct path * pp;
81         unsigned int i, j;
82
83         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
84                 return 0;
85
86         /*
87          * Refresh path priority values
88          */
89         if (refresh)
90                 vector_foreach_slot (mpp->pg, pgp, i)
91                         vector_foreach_slot (pgp->paths, pp, j)
92                                 pathinfo(pp, conf->hwtable, DI_PRIO);
93
94         mpp->bestpg = select_path_group(mpp);
95
96         if (mpp->bestpg != mpp->nextpg)
97                 return 1;
98
99         return 0;
100 }
101
102 static void
103 switch_pathgroup (struct multipath * mpp)
104 {
105         mpp->stat_switchgroup++;
106         dm_switchgroup(mpp->alias, mpp->bestpg);
107         condlog(2, "%s: switch to path group #%i",
108                  mpp->alias, mpp->bestpg);
109 }
110
111 static int
112 coalesce_maps(struct vectors *vecs, vector nmpv)
113 {
114         struct multipath * ompp;
115         vector ompv = vecs->mpvec;
116         unsigned int i;
117         int j;
118
119         vector_foreach_slot (ompv, ompp, i) {
120                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
121                         /*
122                          * remove all current maps not allowed by the
123                          * current configuration
124                          */
125                         if (dm_flush_map(ompp->alias)) {
126                                 condlog(0, "%s: unable to flush devmap",
127                                         ompp->alias);
128                                 /*
129                                  * may be just because the device is open
130                                  */
131                                 if (!vector_alloc_slot(nmpv))
132                                         return 1;
133
134                                 vector_set_slot(nmpv, ompp);
135                                 setup_multipath(vecs, ompp);
136
137                                 if ((j = find_slot(ompv, (void *)ompp)) != -1)
138                                         vector_del_slot(ompv, j);
139
140                                 continue;
141                         }
142                         else {
143                                 dm_lib_release();
144                                 condlog(2, "%s devmap removed", ompp->alias);
145                         }
146                 }
147         }
148         return 0;
149 }
150
151 void
152 sync_map_state(struct multipath *mpp)
153 {
154         struct pathgroup *pgp;
155         struct path *pp;
156         unsigned int i, j;
157
158         if (!mpp->pg)
159                 return;
160
161         vector_foreach_slot (mpp->pg, pgp, i){
162                 vector_foreach_slot (pgp->paths, pp, j){
163                         if (pp->state == PATH_UNCHECKED || 
164                             pp->state == PATH_WILD)
165                                 continue;
166                         if ((pp->dmstate == PSTATE_FAILED ||
167                              pp->dmstate == PSTATE_UNDEF) &&
168                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
169                                 dm_reinstate_path(mpp->alias, pp->dev_t);
170                         else if ((pp->dmstate == PSTATE_ACTIVE ||
171                                   pp->dmstate == PSTATE_UNDEF) &&
172                                  (pp->state == PATH_DOWN ||
173                                   pp->state == PATH_SHAKY))
174                                 dm_fail_path(mpp->alias, pp->dev_t);
175                 }
176         }
177 }
178
179 static void
180 sync_maps_state(vector mpvec)
181 {
182         unsigned int i;
183         struct multipath *mpp;
184
185         vector_foreach_slot (mpvec, mpp, i)
186                 sync_map_state(mpp);
187 }
188
189 static int
190 flush_map(struct multipath * mpp, struct vectors * vecs)
191 {
192         /*
193          * clear references to this map before flushing so we can ignore
194          * the spurious uevent we may generate with the dm_flush_map call below
195          */
196         if (dm_flush_map(mpp->alias)) {
197                 /*
198                  * May not really be an error -- if the map was already flushed
199                  * from the device mapper by dmsetup(8) for instance.
200                  */
201                 condlog(0, "%s: can't flush", mpp->alias);
202                 return 1;
203         }
204         else {
205                 dm_lib_release();
206                 condlog(2, "%s: devmap removed", mpp->alias);
207         }
208
209         orphan_paths(vecs->pathvec, mpp);
210         remove_map_and_stop_waiter(mpp, vecs, 1);
211
212         return 0;
213 }
214
215 static int
216 uev_add_map (struct sysfs_device * dev, struct vectors * vecs)
217 {
218         condlog(2, "%s: add map (uevent)", dev->kernel);
219         return ev_add_map(dev, vecs);
220 }
221
222 int
223 ev_add_map (struct sysfs_device * dev, struct vectors * vecs)
224 {
225         char * alias;
226         char *dev_t;
227         int major, minor;
228         char * refwwid;
229         struct multipath * mpp;
230         int map_present;
231         int r = 1;
232
233         dev_t = sysfs_attr_get_value(dev->devpath, "dev");
234
235         if (!dev_t || sscanf(dev_t, "%d:%d", &major, &minor) != 2)
236                 return 1;
237
238         alias = dm_mapname(major, minor);
239
240         if (!alias)
241                 return 1;
242
243         map_present = dm_map_present(alias);
244
245         if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
246                 condlog(4, "%s: not a multipath map", alias);
247                 FREE(alias);
248                 return 0;
249         }
250
251         mpp = find_mp_by_alias(vecs->mpvec, alias);
252
253         if (mpp) {
254                 /*
255                  * Not really an error -- we generate our own uevent
256                  * if we create a multipath mapped device as a result
257                  * of uev_add_path
258                  */
259                 condlog(0, "%s: devmap already registered",
260                         dev->kernel);
261                 FREE(alias);
262                 return 0;
263         }
264
265         /*
266          * now we can register the map
267          */
268         if (map_present && (mpp = add_map_without_path(vecs, minor, alias))) {
269                 sync_map_state(mpp);
270                 condlog(2, "%s: devmap %s added", alias, dev->kernel);
271                 return 0;
272         }
273         refwwid = get_refwwid(dev->kernel, DEV_DEVMAP, vecs->pathvec);
274
275         if (refwwid) {
276                 r = coalesce_paths(vecs, NULL, refwwid, 0);
277                 dm_lib_release();
278         }
279
280         if (!r)
281                 condlog(2, "%s: devmap %s added", alias, dev->kernel);
282         else
283                 condlog(0, "%s: uev_add_map %s failed", alias, dev->kernel);
284
285         FREE(refwwid);
286         FREE(alias);
287         return r;
288 }
289
290 static int
291 uev_remove_map (struct sysfs_device * dev, struct vectors * vecs)
292 {
293         condlog(2, "%s: remove map (uevent)", dev->kernel);
294         return ev_remove_map(dev->kernel, vecs);
295 }
296
297 int
298 ev_remove_map (char * devname, struct vectors * vecs)
299 {
300         struct multipath * mpp;
301
302         mpp = find_mp_by_str(vecs->mpvec, devname);
303
304         if (!mpp) {
305                 condlog(2, "%s: devmap not registered, can't remove",
306                         devname);
307                 return 0;
308         }
309         flush_map(mpp, vecs);
310
311         return 0;
312 }
313
314 static int
315 uev_umount_map (struct sysfs_device * dev, struct vectors * vecs)
316 {
317         struct multipath * mpp;
318
319         condlog(2, "%s: umount map (uevent)", dev->kernel);
320
321         mpp = find_mp_by_str(vecs->mpvec, dev->kernel);
322
323         if (!mpp)
324                 return 0;
325
326         update_mpp_paths(mpp, vecs->pathvec);
327         verify_paths(mpp, vecs, NULL);
328
329         if (!VECTOR_SIZE(mpp->paths))
330                 flush_map(mpp, vecs);
331
332         return 0;
333 }
334
335 static int
336 uev_add_path (struct sysfs_device * dev, struct vectors * vecs)
337 {
338         condlog(2, "%s: add path (uevent)", dev->kernel);
339         return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
340 }
341
342
343 /*
344  * returns:
345  * 0: added
346  * 1: error
347  * 2: blacklisted
348  */
349 int
350 ev_add_path (char * devname, struct vectors * vecs)
351 {
352         struct multipath * mpp;
353         struct path * pp;
354         char empty_buff[WWID_SIZE] = {0};
355
356         pp = find_path_by_dev(vecs->pathvec, devname);
357
358         if (pp) {
359                 condlog(0, "%s: spurious uevent, path already in pathvec",
360                         devname);
361                 if (pp->mpp)
362                         return 0;
363         }
364         else {
365                 /*
366                  * get path vital state
367                  */
368                 if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
369                       devname, DI_ALL))) {
370                         condlog(0, "%s: failed to store path info", devname);
371                         return 1;
372                 }
373                 pp->checkint = conf->checkint;
374         }
375
376         /*
377          * need path UID to go any further
378          */
379         if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
380                 condlog(0, "%s: failed to get path uid", devname);
381                 return 1; /* leave path added to pathvec */
382         }
383         if (filter_path(conf, pp) > 0){
384                 int i = find_slot(vecs->pathvec, (void *)pp);
385                 if (i != -1)
386                         vector_del_slot(vecs->pathvec, i);
387                 free_path(pp);
388                 return 2;
389         }
390         mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
391 rescan:
392         if (mpp) {
393                 condlog(4,"%s: adopting all paths for path %s",
394                         mpp->alias, pp->dev);
395                 if (adopt_paths(vecs->pathvec, mpp))
396                         return 1; /* leave path added to pathvec */
397
398                 verify_paths(mpp, vecs, NULL);
399                 mpp->flush_on_last_del = FLUSH_UNDEF;
400                 mpp->action = ACT_RELOAD;
401         }
402         else {
403                 condlog(4,"%s: creating new map", pp->dev);
404                 if ((mpp = add_map_with_path(vecs, pp, 1)))
405                         mpp->action = ACT_CREATE;
406                 else
407                         return 1; /* leave path added to pathvec */
408         }
409
410         /*
411          * push the map to the device-mapper
412          */
413         if (setup_map(mpp)) {
414                 condlog(0, "%s: failed to setup map for addition of new "
415                         "path %s", mpp->alias, devname);
416                 goto out;
417         }
418         /*
419          * reload the map for the multipath mapped device
420          */
421         if (domap(mpp) <= 0) {
422                 condlog(0, "%s: failed in domap for addition of new "
423                         "path %s", mpp->alias, devname);
424                 /*
425                  * deal with asynchronous uevents :((
426                  */
427                 if (mpp->action == ACT_RELOAD) {
428                         condlog(0, "%s: uev_add_path sleep", mpp->alias);
429                         sleep(1);
430                         update_mpp_paths(mpp, vecs->pathvec);
431                         goto rescan;
432                 }
433                 else
434                         goto out;
435         }
436         dm_lib_release();
437
438         /*
439          * update our state from kernel regardless of create or reload
440          */
441         if (setup_multipath(vecs, mpp))
442                 goto out;
443
444         sync_map_state(mpp);
445
446         if (mpp->action == ACT_CREATE &&
447             start_waiter_thread(mpp, vecs))
448                         goto out;
449
450         condlog(2, "%s path added to devmap %s", devname, mpp->alias);
451         return 0;
452
453 out:
454         remove_map(mpp, vecs, 1);
455         return 1;
456 }
457
458 static int
459 uev_remove_path (struct sysfs_device * dev, struct vectors * vecs)
460 {
461         int retval;
462
463         condlog(2, "%s: remove path (uevent)", dev->kernel);
464         retval = ev_remove_path(dev->kernel, vecs);
465         if (!retval)
466                 sysfs_device_put(dev);
467
468         return retval;
469 }
470
471 int
472 ev_remove_path (char * devname, struct vectors * vecs)
473 {
474         struct multipath * mpp;
475         struct path * pp;
476         int i, retval = 0;
477
478         pp = find_path_by_dev(vecs->pathvec, devname);
479
480         if (!pp) {
481                 /* Not an error; path might have been purged earlier */
482                 condlog(0, "%s: path already removed", devname);
483                 return 0;
484         }
485
486         /*
487          * avoid referring to the map of an orphaned path
488          */
489         if ((mpp = pp->mpp)) {
490                 /*
491                  * transform the mp->pg vector of vectors of paths
492                  * into a mp->params string to feed the device-mapper
493                  */
494                 if (update_mpp_paths(mpp, vecs->pathvec)) {
495                         condlog(0, "%s: failed to update paths",
496                                 mpp->alias);
497                         goto fail;
498                 }
499                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
500                         vector_del_slot(mpp->paths, i);
501
502                 /*
503                  * remove the map IFF removing the last path
504                  */
505                 if (VECTOR_SIZE(mpp->paths) == 0) {
506                         char alias[WWID_SIZE];
507
508                         /*
509                          * flush_map will fail if the device is open
510                          */
511                         strncpy(alias, mpp->alias, WWID_SIZE);
512                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
513                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
514                                 mpp->retry_tick = 0;
515                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
516                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
517                                 dm_queue_if_no_path(mpp->alias, 0);
518                         }
519                         if (!flush_map(mpp, vecs)) {
520                                 condlog(2, "%s: removed map after"
521                                         " removing all paths",
522                                         alias);
523                                 retval = 0;
524                                 goto out;
525                         }
526                         /*
527                          * Not an error, continue
528                          */
529                 }
530
531                 if (setup_map(mpp)) {
532                         condlog(0, "%s: failed to setup map for"
533                                 " removal of path %s", mpp->alias,
534                                 devname);
535                         goto fail;
536                 }
537                 /*
538                  * reload the map
539                  */
540                 mpp->action = ACT_RELOAD;
541                 if (domap(mpp) <= 0) {
542                         condlog(0, "%s: failed in domap for "
543                                 "removal of path %s",
544                                 mpp->alias, devname);
545                         retval = 1;
546                 } else {
547                         /*
548                          * update our state from kernel
549                          */
550                         if (setup_multipath(vecs, mpp)) {
551                                 goto fail;
552                         }
553                         sync_map_state(mpp);
554
555                         condlog(2, "%s: path removed from map %s",
556                                 devname, mpp->alias);
557                 }
558         }
559
560 out:
561         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
562                 vector_del_slot(vecs->pathvec, i);
563
564         free_path(pp);
565
566         return retval;
567
568 fail:
569         remove_map_and_stop_waiter(mpp, vecs, 1);
570         return 1;
571 }
572
573 static int
574 map_discovery (struct vectors * vecs)
575 {
576         struct multipath * mpp;
577         unsigned int i;
578
579         if (dm_get_maps(vecs->mpvec))
580                 return 1;
581
582         vector_foreach_slot (vecs->mpvec, mpp, i)
583                 if (setup_multipath(vecs, mpp))
584                         return 1;
585
586         return 0;
587 }
588
589 int
590 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
591 {
592         struct vectors * vecs;
593         int r;
594
595         *reply = NULL;
596         *len = 0;
597         vecs = (struct vectors *)trigger_data;
598
599         pthread_cleanup_push(cleanup_lock, &vecs->lock);
600         lock(vecs->lock);
601
602         r = parse_cmd(str, reply, len, vecs);
603
604         if (r > 0) {
605                 *reply = STRDUP("fail\n");
606                 *len = strlen(*reply) + 1;
607                 r = 1;
608         }
609         else if (!r && *len == 0) {
610                 *reply = STRDUP("ok\n");
611                 *len = strlen(*reply) + 1;
612                 r = 0;
613         }
614         /* else if (r < 0) leave *reply alone */
615
616         lock_cleanup_pop(vecs->lock);
617         return r;
618 }
619
620 static int
621 uev_discard(char * devpath)
622 {
623         char *tmp;
624         char a[11], b[11];
625
626         /*
627          * keep only block devices, discard partitions
628          */
629         tmp = strstr(devpath, "/block/");
630         if (tmp == NULL){
631                 condlog(4, "no /block/ in '%s'", devpath);
632                 return 1;
633         }
634         if (sscanf(tmp, "/block/%10s", a) != 1 ||
635             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
636                 condlog(4, "discard event on %s", devpath);
637                 return 1;
638         }
639         return 0;
640 }
641
642 int
643 uev_trigger (struct uevent * uev, void * trigger_data)
644 {
645         int r = 0;
646         struct sysfs_device *sysdev;
647         struct vectors * vecs;
648
649         vecs = (struct vectors *)trigger_data;
650
651         if (uev_discard(uev->devpath))
652                 return 0;
653
654         sysdev = sysfs_device_get(uev->devpath);
655         if(!sysdev)
656                 return 0;
657
658         lock(vecs->lock);
659
660         /*
661          * device map event
662          * Add events are ignored here as the tables
663          * are not fully initialised then.
664          */
665         if (!strncmp(sysdev->kernel, "dm-", 3)) {
666                 if (!strncmp(uev->action, "change", 6)) {
667                         r = uev_add_map(sysdev, vecs);
668                         goto out;
669                 }
670                 if (!strncmp(uev->action, "remove", 6)) {
671                         r = uev_remove_map(sysdev, vecs);
672                         goto out;
673                 }
674                 if (!strncmp(uev->action, "umount", 6)) {
675                         r = uev_umount_map(sysdev, vecs);
676                         goto out;
677                 }
678                 goto out;
679         }
680
681         /*
682          * path add/remove event
683          */
684         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
685                            sysdev->kernel) > 0)
686                 goto out;
687
688         if (!strncmp(uev->action, "add", 3)) {
689                 r = uev_add_path(sysdev, vecs);
690                 goto out;
691         }
692         if (!strncmp(uev->action, "remove", 6)) {
693                 r = uev_remove_path(sysdev, vecs);
694                 goto out;
695         }
696
697 out:
698         unlock(vecs->lock);
699         return r;
700 }
701
702 static void *
703 ueventloop (void * ap)
704 {
705         block_signal(SIGUSR1, NULL);
706         block_signal(SIGHUP, NULL);
707
708         if (uevent_listen(&uev_trigger, ap))
709                 fprintf(stderr, "error starting uevent listener");
710
711         return NULL;
712 }
713
714 static void *
715 uxlsnrloop (void * ap)
716 {
717         block_signal(SIGUSR1, NULL);
718         block_signal(SIGHUP, NULL);
719
720         if (cli_init())
721                 return NULL;
722
723         set_handler_callback(LIST+PATHS, cli_list_paths);
724         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
725         set_handler_callback(LIST+MAPS, cli_list_maps);
726         set_handler_callback(LIST+STATUS, cli_list_status);
727         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
728         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
729         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
730         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
731         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
732         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
733         set_handler_callback(LIST+CONFIG, cli_list_config);
734         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
735         set_handler_callback(LIST+DEVICES, cli_list_devices);
736         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
737         set_handler_callback(ADD+PATH, cli_add_path);
738         set_handler_callback(DEL+PATH, cli_del_path);
739         set_handler_callback(ADD+MAP, cli_add_map);
740         set_handler_callback(DEL+MAP, cli_del_map);
741         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
742         set_handler_callback(RECONFIGURE, cli_reconfigure);
743         set_handler_callback(SUSPEND+MAP, cli_suspend);
744         set_handler_callback(RESUME+MAP, cli_resume);
745         set_handler_callback(RESIZE+MAP, cli_resize);
746         set_handler_callback(REINSTATE+PATH, cli_reinstate);
747         set_handler_callback(FAIL+PATH, cli_fail);
748         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
749         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
750         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
751         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
752         set_handler_callback(QUIT, cli_quit);
753
754         uxsock_listen(&uxsock_trigger, ap);
755
756         return NULL;
757 }
758
759 static int
760 exit_daemon (int status)
761 {
762         if (status != 0)
763                 fprintf(stderr, "bad exit status. see daemon.log\n");
764
765         condlog(3, "unlink pidfile");
766         unlink(DEFAULT_PIDFILE);
767
768         pthread_mutex_lock(&exit_mutex);
769         pthread_cond_signal(&exit_cond);
770         pthread_mutex_unlock(&exit_mutex);
771
772         return status;
773 }
774
775 static void
776 fail_path (struct path * pp, int del_active)
777 {
778         if (!pp->mpp)
779                 return;
780
781         condlog(2, "checker failed path %s in map %s",
782                  pp->dev_t, pp->mpp->alias);
783
784         dm_fail_path(pp->mpp->alias, pp->dev_t);
785         if (del_active)
786                 update_queue_mode_del_path(pp->mpp);
787 }
788
789 /*
790  * caller must have locked the path list before calling that function
791  */
792 static void
793 reinstate_path (struct path * pp, int add_active)
794 {
795         if (!pp->mpp)
796                 return;
797
798         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
799                 condlog(0, "%s: reinstate failed", pp->dev_t);
800         else {
801                 condlog(2, "%s: reinstated", pp->dev_t);
802                 if (add_active)
803                         update_queue_mode_add_path(pp->mpp);
804         }
805 }
806
807 static void
808 enable_group(struct path * pp)
809 {
810         struct pathgroup * pgp;
811
812         /*
813          * if path is added through uev_add_path, pgindex can be unset.
814          * next update_strings() will set it, upon map reload event.
815          *
816          * we can safely return here, because upon map reload, all
817          * PG will be enabled.
818          */
819         if (!pp->mpp->pg || !pp->pgindex)
820                 return;
821
822         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
823
824         if (pgp->status == PGSTATE_DISABLED) {
825                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
826                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
827         }
828 }
829
830 static void
831 mpvec_garbage_collector (struct vectors * vecs)
832 {
833         struct multipath * mpp;
834         unsigned int i;
835
836         if (!vecs->mpvec)
837                 return;
838
839         vector_foreach_slot (vecs->mpvec, mpp, i) {
840                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
841                         condlog(2, "%s: remove dead map", mpp->alias);
842                         remove_map_and_stop_waiter(mpp, vecs, 1);
843                         i--;
844                 }
845         }
846 }
847
848 static void
849 defered_failback_tick (vector mpvec)
850 {
851         struct multipath * mpp;
852         unsigned int i;
853
854         vector_foreach_slot (mpvec, mpp, i) {
855                 /*
856                  * defered failback getting sooner
857                  */
858                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
859                         mpp->failback_tick--;
860
861                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
862                                 switch_pathgroup(mpp);
863                 }
864         }
865 }
866
867 static void
868 retry_count_tick(vector mpvec)
869 {
870         struct multipath *mpp;
871         unsigned int i;
872
873         vector_foreach_slot (mpvec, mpp, i) {
874                 if (mpp->retry_tick) {
875                         mpp->stat_total_queueing_time++;
876                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
877                         if(--mpp->retry_tick == 0) {
878                                 dm_queue_if_no_path(mpp->alias, 0);
879                                 condlog(2, "%s: Disable queueing", mpp->alias);
880                         }
881                 }
882         }
883 }
884
885 void
886 check_path (struct vectors * vecs, struct path * pp)
887 {
888         int newstate;
889
890         if (!pp->mpp)
891                 return;
892
893         if (pp->tick && --pp->tick)
894                 return; /* don't check this path yet */
895
896         /*
897          * provision a next check soonest,
898          * in case we exit abnormaly from here
899          */
900         pp->tick = conf->checkint;
901
902         if (!checker_selected(&pp->checker)) {
903                 pathinfo(pp, conf->hwtable, DI_SYSFS);
904                 select_checker(pp);
905         }
906         if (!checker_selected(&pp->checker)) {
907                 condlog(0, "%s: checker is not set", pp->dev);
908                 return;
909         }
910         /*
911          * Set checker in async mode.
912          * Honored only by checker implementing the said mode.
913          */
914         checker_set_async(&pp->checker);
915
916         if (path_offline(pp))
917                 newstate = PATH_DOWN;
918         else
919                 newstate = checker_check(&pp->checker);
920
921         if (newstate < 0) {
922                 condlog(2, "%s: unusable path", pp->dev);
923                 pathinfo(pp, conf->hwtable, 0);
924                 return;
925         }
926         /*
927          * Async IO in flight. Keep the previous path state
928          * and reschedule as soon as possible
929          */
930         if (newstate == PATH_PENDING) {
931                 pp->tick = 1;
932                 return;
933         }
934         if (newstate != pp->state) {
935                 int oldstate = pp->state;
936                 pp->state = newstate;
937                 LOG_MSG(1, checker_message(&pp->checker));
938
939                 /*
940                  * upon state change, reset the checkint
941                  * to the shortest delay
942                  */
943                 pp->checkint = conf->checkint;
944
945                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
946                     update_multipath_strings(pp->mpp, vecs->pathvec)) {
947                         /*
948                          * proactively fail path in the DM
949                          */
950                         if (oldstate == PATH_UP ||
951                             oldstate == PATH_GHOST)
952                                 fail_path(pp, 1);
953                         else
954                                 fail_path(pp, 0);
955
956                         /*
957                          * cancel scheduled failback
958                          */
959                         pp->mpp->failback_tick = 0;
960
961                         pp->mpp->stat_path_failures++;
962                         return;
963                 }
964
965                 /*
966                  * reinstate this path
967                  */
968                 if (oldstate != PATH_UP &&
969                     oldstate != PATH_GHOST)
970                         reinstate_path(pp, 1);
971                 else
972                         reinstate_path(pp, 0);
973
974                 /*
975                  * schedule [defered] failback
976                  */
977                 if (pp->mpp->pgfailback > 0)
978                         pp->mpp->failback_tick =
979                                 pp->mpp->pgfailback + 1;
980                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
981                     need_switch_pathgroup(pp->mpp, 1))
982                         switch_pathgroup(pp->mpp);
983
984                 /*
985                  * if at least one path is up in a group, and
986                  * the group is disabled, re-enable it
987                  */
988                 if (newstate == PATH_UP)
989                         enable_group(pp);
990         }
991         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
992                 LOG_MSG(4, checker_message(&pp->checker));
993                 /*
994                  * double the next check delay.
995                  * max at conf->max_checkint
996                  */
997                 if (pp->checkint < (conf->max_checkint / 2))
998                         pp->checkint = 2 * pp->checkint;
999                 else
1000                         pp->checkint = conf->max_checkint;
1001
1002                 pp->tick = pp->checkint;
1003                 condlog(4, "%s: delay next check %is",
1004                                 pp->dev_t, pp->tick);
1005         }
1006         else if (newstate == PATH_DOWN)
1007                 LOG_MSG(2, checker_message(&pp->checker));
1008
1009         pp->state = newstate;
1010
1011         /*
1012          * path prio refreshing
1013          */
1014         condlog(4, "path prio refresh");
1015         pathinfo(pp, conf->hwtable, DI_PRIO);
1016
1017         /*
1018          * pathgroup failback policy
1019          */
1020         if (need_switch_pathgroup(pp->mpp, 0)) {
1021                 if (pp->mpp->pgfailback > 0 &&
1022                     pp->mpp->failback_tick <= 0)
1023                         pp->mpp->failback_tick =
1024                                 pp->mpp->pgfailback + 1;
1025                 else if (pp->mpp->pgfailback ==
1026                                 -FAILBACK_IMMEDIATE)
1027                         switch_pathgroup(pp->mpp);
1028         }
1029 }
1030
1031 static void *
1032 checkerloop (void *ap)
1033 {
1034         struct vectors *vecs;
1035         struct path *pp;
1036         int count = 0;
1037         unsigned int i;
1038         sigset_t old;
1039
1040         mlockall(MCL_CURRENT | MCL_FUTURE);
1041         vecs = (struct vectors *)ap;
1042         condlog(2, "path checkers start up");
1043
1044         /*
1045          * init the path check interval
1046          */
1047         vector_foreach_slot (vecs->pathvec, pp, i) {
1048                 pp->checkint = conf->checkint;
1049         }
1050
1051         while (1) {
1052                 block_signal(SIGHUP, &old);
1053                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1054                 lock(vecs->lock);
1055                 condlog(4, "tick");
1056
1057                 if (vecs->pathvec) {
1058                         vector_foreach_slot (vecs->pathvec, pp, i) {
1059                                 check_path(vecs, pp);
1060                         }
1061                 }
1062                 if (vecs->mpvec) {
1063                         defered_failback_tick(vecs->mpvec);
1064                         retry_count_tick(vecs->mpvec);
1065                 }
1066                 if (count)
1067                         count--;
1068                 else {
1069                         condlog(4, "map garbage collection");
1070                         mpvec_garbage_collector(vecs);
1071                         count = MAPGCINT;
1072                 }
1073
1074                 lock_cleanup_pop(vecs->lock);
1075                 pthread_sigmask(SIG_SETMASK, &old, NULL);
1076                 sleep(1);
1077         }
1078         return NULL;
1079 }
1080
1081 int
1082 configure (struct vectors * vecs, int start_waiters)
1083 {
1084         struct multipath * mpp;
1085         struct path * pp;
1086         vector mpvec;
1087         int i;
1088
1089         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1090                 return 1;
1091
1092         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1093                 return 1;
1094
1095         if (!(mpvec = vector_alloc()))
1096                 return 1;
1097
1098         /*
1099          * probe for current path (from sysfs) and map (from dm) sets
1100          */
1101         path_discovery(vecs->pathvec, conf, DI_ALL);
1102
1103         vector_foreach_slot (vecs->pathvec, pp, i){
1104                 if (filter_path(conf, pp) > 0){
1105                         vector_del_slot(vecs->pathvec, i);
1106                         free_path(pp);
1107                         i--;
1108                 }
1109                 else
1110                         pp->checkint = conf->checkint;
1111         }
1112         if (map_discovery(vecs))
1113                 return 1;
1114
1115         /*
1116          * create new set of maps & push changed ones into dm
1117          */
1118         if (coalesce_paths(vecs, mpvec, NULL, 0))
1119                 return 1;
1120
1121         /*
1122          * may need to remove some maps which are no longer relevant
1123          * e.g., due to blacklist changes in conf file
1124          */
1125         if (coalesce_maps(vecs, mpvec))
1126                 return 1;
1127
1128         dm_lib_release();
1129
1130         sync_maps_state(mpvec);
1131
1132         /*
1133          * purge dm of old maps
1134          */
1135         remove_maps(vecs);
1136
1137         /*
1138          * save new set of maps formed by considering current path state
1139          */
1140         vector_free(vecs->mpvec);
1141         vecs->mpvec = mpvec;
1142
1143         /*
1144          * start dm event waiter threads for these new maps
1145          */
1146         vector_foreach_slot(vecs->mpvec, mpp, i) {
1147                 if (setup_multipath(vecs, mpp))
1148                         return 1;
1149                 if (start_waiters)
1150                         if (start_waiter_thread(mpp, vecs))
1151                                 return 1;
1152         }
1153         return 0;
1154 }
1155
1156 int
1157 reconfigure (struct vectors * vecs)
1158 {
1159         struct config * old = conf;
1160
1161         /*
1162          * free old map and path vectors ... they use old conf state
1163          */
1164         if (VECTOR_SIZE(vecs->mpvec))
1165                 remove_maps_and_stop_waiters(vecs);
1166
1167         if (VECTOR_SIZE(vecs->pathvec))
1168                 free_pathvec(vecs->pathvec, FREE_PATHS);
1169
1170         vecs->pathvec = NULL;
1171         conf = NULL;
1172
1173         if (load_config(DEFAULT_CONFIGFILE))
1174                 return 1;
1175
1176         conf->verbosity = old->verbosity;
1177
1178         if (!conf->checkint) {
1179                 conf->checkint = DEFAULT_CHECKINT;
1180                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1181         }
1182         configure(vecs, 1);
1183         free_config(old);
1184         return 0;
1185 }
1186
1187 static struct vectors *
1188 init_vecs (void)
1189 {
1190         struct vectors * vecs;
1191
1192         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1193
1194         if (!vecs)
1195                 return NULL;
1196
1197         vecs->lock.mutex =
1198                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1199
1200         if (!vecs->lock.mutex)
1201                 goto out;
1202
1203         pthread_mutex_init(vecs->lock.mutex, NULL);
1204         vecs->lock.depth = 0;
1205
1206         return vecs;
1207
1208 out:
1209         FREE(vecs);
1210         condlog(0, "failed to init paths");
1211         return NULL;
1212 }
1213
1214 static void *
1215 signal_set(int signo, void (*func) (int))
1216 {
1217         int r;
1218         struct sigaction sig;
1219         struct sigaction osig;
1220
1221         sig.sa_handler = func;
1222         sigemptyset(&sig.sa_mask);
1223         sig.sa_flags = 0;
1224
1225         r = sigaction(signo, &sig, &osig);
1226
1227         if (r < 0)
1228                 return (SIG_ERR);
1229         else
1230                 return (osig.sa_handler);
1231 }
1232
1233 static void
1234 sighup (int sig)
1235 {
1236         condlog(2, "reconfigure (SIGHUP)");
1237
1238         lock(gvecs->lock);
1239         reconfigure(gvecs);
1240         unlock(gvecs->lock);
1241
1242 #ifdef _DEBUG_
1243         dbg_free_final(NULL);
1244 #endif
1245 }
1246
1247 static void
1248 sigend (int sig)
1249 {
1250         exit_daemon(0);
1251 }
1252
1253 static void
1254 sigusr1 (int sig)
1255 {
1256         condlog(3, "SIGUSR1 received");
1257 }
1258
1259 static void
1260 signal_init(void)
1261 {
1262         signal_set(SIGHUP, sighup);
1263         signal_set(SIGUSR1, sigusr1);
1264         signal_set(SIGINT, sigend);
1265         signal_set(SIGTERM, sigend);
1266         signal(SIGPIPE, SIG_IGN);
1267 }
1268
1269 static void
1270 setscheduler (void)
1271 {
1272         int res;
1273         static struct sched_param sched_param = {
1274                 .sched_priority = 99
1275         };
1276
1277         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1278
1279         if (res == -1)
1280                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1281         return;
1282 }
1283
1284 static void
1285 set_oom_adj (int val)
1286 {
1287         FILE *fp;
1288
1289         fp = fopen("/proc/self/oom_adj", "w");
1290
1291         if (!fp)
1292                 return;
1293
1294         fprintf(fp, "%i", val);
1295         fclose(fp);
1296 }
1297
1298 void
1299 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
1300 {
1301         if (pthread_attr_init(attr)) {
1302                 fprintf(stderr, "can't initialize thread attr: %s\n",
1303                         strerror(errno));
1304                 exit(1);
1305         }
1306         if (stacksize < PTHREAD_STACK_MIN)
1307                 stacksize = PTHREAD_STACK_MIN;
1308
1309         if (pthread_attr_setstacksize(attr, stacksize)) {
1310                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
1311                         (unsigned long)stacksize, strerror(errno));
1312                 exit(1);
1313         }
1314         if (detached && pthread_attr_setdetachstate(attr,
1315                                                     PTHREAD_CREATE_DETACHED)) {
1316                 fprintf(stderr, "can't set thread to detached: %s\n",
1317                         strerror(errno));
1318                 exit(1);
1319         }
1320 }
1321
1322 static int
1323 child (void * param)
1324 {
1325         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1326         pthread_attr_t log_attr, misc_attr;
1327         struct vectors * vecs;
1328
1329         mlockall(MCL_CURRENT | MCL_FUTURE);
1330
1331         setup_thread_attr(&misc_attr, 64 * 1024, 1);
1332         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
1333
1334         if (logsink) {
1335                 setup_thread_attr(&log_attr, 64 * 1024, 0);
1336                 log_thread_start(&log_attr);
1337                 pthread_attr_destroy(&log_attr);
1338         }
1339
1340         condlog(2, "--------start up--------");
1341         condlog(2, "read " DEFAULT_CONFIGFILE);
1342
1343         if (load_config(DEFAULT_CONFIGFILE))
1344                 exit(1);
1345
1346         if (init_checkers()) {
1347                 condlog(0, "failed to initialize checkers");
1348                 exit(1);
1349         }
1350         if (init_prio()) {
1351                 condlog(0, "failed to initialize prioritizers");
1352                 exit(1);
1353         }
1354
1355         setlogmask(LOG_UPTO(conf->verbosity + 3));
1356
1357         /*
1358          * fill the voids left in the config file
1359          */
1360         if (!conf->checkint) {
1361                 conf->checkint = DEFAULT_CHECKINT;
1362                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1363         }
1364
1365         if (conf->max_fds) {
1366                 struct rlimit fd_limit;
1367                 if (conf->max_fds > 0) {
1368                         fd_limit.rlim_cur = conf->max_fds;
1369                         fd_limit.rlim_max = conf->max_fds;
1370                 }
1371                 else {
1372                         fd_limit.rlim_cur = RLIM_INFINITY;
1373                         fd_limit.rlim_max = RLIM_INFINITY;
1374                 }
1375                 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
1376                         condlog(0, "can't set open fds limit to %d : %s\n",
1377                                 conf->max_fds, strerror(errno));
1378         }
1379
1380         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1381                 if (logsink)
1382                         log_thread_stop();
1383
1384                 exit(1);
1385         }
1386         signal_init();
1387         setscheduler();
1388         set_oom_adj(-16);
1389         vecs = gvecs = init_vecs();
1390
1391         if (!vecs)
1392                 exit(1);
1393
1394         if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
1395                 condlog(0, "can not find sysfs mount point");
1396                 exit(1);
1397         }
1398         conf->daemon = 1;
1399         /*
1400          * fetch and configure both paths and multipaths
1401          */
1402         if (configure(vecs, 1)) {
1403                 condlog(0, "failure during configuration");
1404                 exit(1);
1405         }
1406         /*
1407          * start threads
1408          */
1409         pthread_create(&check_thr, &misc_attr, checkerloop, vecs);
1410         pthread_create(&uevent_thr, &misc_attr, ueventloop, vecs);
1411         pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
1412         pthread_attr_destroy(&misc_attr);
1413
1414         pthread_cond_wait(&exit_cond, &exit_mutex);
1415
1416         /*
1417          * exit path
1418          */
1419         block_signal(SIGHUP, NULL);
1420         lock(vecs->lock);
1421         remove_maps_and_stop_waiters(vecs);
1422         free_pathvec(vecs->pathvec, FREE_PATHS);
1423
1424         pthread_cancel(check_thr);
1425         pthread_cancel(uevent_thr);
1426         pthread_cancel(uxlsnr_thr);
1427
1428         sysfs_cleanup();
1429
1430         free_keys(keys);
1431         keys = NULL;
1432         free_handlers(handlers);
1433         handlers = NULL;
1434         free_polls();
1435
1436         unlock(vecs->lock);
1437         /* Now all the waitevent threads will start rushing in. */
1438         while (vecs->lock.depth > 0) {
1439                 sleep (1); /* This is weak. */
1440                 condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
1441         }
1442         pthread_mutex_destroy(vecs->lock.mutex);
1443         FREE(vecs->lock.mutex);
1444         vecs->lock.depth = 0;
1445         vecs->lock.mutex = NULL;
1446         FREE(vecs);
1447         vecs = NULL;
1448
1449         condlog(2, "--------shut down-------");
1450
1451         if (logsink)
1452                 log_thread_stop();
1453
1454         dm_lib_release();
1455         dm_lib_exit();
1456
1457         cleanup_prio();
1458         cleanup_checkers();
1459         /*
1460          * Freeing config must be done after condlog() and dm_lib_exit(),
1461          * because logging functions like dlog() and dm_write_log()
1462          * reference the config.
1463          */
1464         free_config(conf);
1465         conf = NULL;
1466
1467 #ifdef _DEBUG_
1468         dbg_free_final(NULL);
1469 #endif
1470
1471         exit(0);
1472 }
1473
1474 static int
1475 daemonize(void)
1476 {
1477         int pid;
1478         int in_fd, out_fd;
1479
1480         if( (pid = fork()) < 0){
1481                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1482                 return -1;
1483         }
1484         else if (pid != 0)
1485                 return pid;
1486
1487         setsid();
1488
1489         if ( (pid = fork()) < 0)
1490                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1491         else if (pid != 0)
1492                 _exit(0);
1493
1494         in_fd = open("/dev/null", O_RDONLY);
1495         if (in_fd < 0){
1496                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1497                         strerror(errno));
1498                 _exit(0);
1499         }
1500         out_fd = open("/dev/console", O_WRONLY);
1501         if (out_fd < 0){
1502                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1503                         strerror(errno));
1504                 _exit(0);
1505         }
1506
1507         close(STDIN_FILENO);
1508         dup(in_fd);
1509         close(STDOUT_FILENO);
1510         dup(out_fd);
1511         close(STDERR_FILENO);
1512         dup(out_fd);
1513
1514         close(in_fd);
1515         close(out_fd);
1516         if (chdir("/") < 0)
1517                 fprintf(stderr, "cannot chdir to '/', continuing\n");
1518
1519         return 0;
1520 }
1521
1522 int
1523 main (int argc, char *argv[])
1524 {
1525         extern char *optarg;
1526         extern int optind;
1527         int arg;
1528         int err;
1529
1530         logsink = 1;
1531         dm_init();
1532
1533         if (getuid() != 0) {
1534                 fprintf(stderr, "need to be root\n");
1535                 exit(1);
1536         }
1537
1538         /* make sure we don't lock any path */
1539         chdir("/");
1540         umask(umask(077) | 022);
1541
1542         conf = alloc_config();
1543
1544         if (!conf)
1545                 exit(1);
1546
1547         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1548         switch(arg) {
1549                 case 'd':
1550                         logsink = 0;
1551                         //debug=1; /* ### comment me out ### */
1552                         break;
1553                 case 'v':
1554                         if (sizeof(optarg) > sizeof(char *) ||
1555                             !isdigit(optarg[0]))
1556                                 exit(1);
1557
1558                         conf->verbosity = atoi(optarg);
1559                         break;
1560                 case 'k':
1561                         uxclnt(optarg);
1562                         exit(0);
1563                 default:
1564                         ;
1565                 }
1566         }
1567         if (optind < argc) {
1568                 char cmd[CMDSIZE];
1569                 char * s = cmd;
1570                 char * c = s;
1571
1572                 while (optind < argc) {
1573                         if (strchr(argv[optind], ' '))
1574                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
1575                         else
1576                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
1577                         optind++;
1578                 }
1579                 c += snprintf(c, s + CMDSIZE - c, "\n");
1580                 uxclnt(s);
1581                 exit(0);
1582         }
1583
1584         if (!logsink)
1585                 err = 0;
1586         else
1587                 err = daemonize();
1588
1589         if (err < 0)
1590                 /* error */
1591                 exit(1);
1592         else if (err > 0)
1593                 /* parent dies */
1594                 exit(0);
1595         else
1596                 /* child lives */
1597                 return (child(NULL));
1598 }
1599