Imported Upstream version 0.6.2
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #include <urcu.h>
21 #ifdef USE_SYSTEMD
22 #include <systemd/sd-daemon.h>
23 #endif
24 #include <semaphore.h>
25 #include <time.h>
26
27 /*
28  * libcheckers
29  */
30 #include "checkers.h"
31
32 #ifdef USE_SYSTEMD
33 static int use_watchdog;
34 #endif
35
36 int uxsock_timeout;
37
38 /*
39  * libmultipath
40  */
41 #include "parser.h"
42 #include "vector.h"
43 #include "memory.h"
44 #include "config.h"
45 #include "util.h"
46 #include "hwtable.h"
47 #include "defaults.h"
48 #include "structs.h"
49 #include "blacklist.h"
50 #include "structs_vec.h"
51 #include "dmparser.h"
52 #include "devmapper.h"
53 #include "sysfs.h"
54 #include "dict.h"
55 #include "discovery.h"
56 #include "debug.h"
57 #include "propsel.h"
58 #include "uevent.h"
59 #include "switchgroup.h"
60 #include "print.h"
61 #include "configure.h"
62 #include "prio.h"
63 #include "wwids.h"
64 #include "pgpolicies.h"
65 #include "uevent.h"
66 #include "log.h"
67
68 #include "mpath_cmd.h"
69 #include "mpath_persist.h"
70
71 #include "prioritizers/alua_rtpg.h"
72
73 #include "main.h"
74 #include "pidfile.h"
75 #include "uxlsnr.h"
76 #include "uxclnt.h"
77 #include "cli.h"
78 #include "cli_handlers.h"
79 #include "lock.h"
80 #include "waiter.h"
81 #include "wwids.h"
82
83 #define FILE_NAME_SIZE 256
84 #define CMDSIZE 160
85
86 #define LOG_MSG(a, b) \
87 do { \
88         if (pp->offline) \
89                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
90         else if (strlen(b)) \
91                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
92 } while(0)
93
94 struct mpath_event_param
95 {
96         char * devname;
97         struct multipath *mpp;
98 };
99
100 unsigned int mpath_mx_alloc_len;
101
102 int logsink;
103 int verbosity;
104 int bindings_read_only;
105 int ignore_new_devs;
106 enum daemon_status running_state = DAEMON_INIT;
107 pid_t daemon_pid;
108 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
109 pthread_cond_t config_cond = PTHREAD_COND_INITIALIZER;
110
111 /*
112  * global copy of vecs for use in sig handlers
113  */
114 struct vectors * gvecs;
115
116 struct udev * udev;
117
118 struct config *multipath_conf;
119
120 const char *
121 daemon_status(void)
122 {
123         switch (running_state) {
124         case DAEMON_INIT:
125                 return "init";
126         case DAEMON_START:
127                 return "startup";
128         case DAEMON_CONFIGURE:
129                 return "configure";
130         case DAEMON_IDLE:
131                 return "idle";
132         case DAEMON_RUNNING:
133                 return "running";
134         case DAEMON_SHUTDOWN:
135                 return "shutdown";
136         }
137         return NULL;
138 }
139
140 /*
141  * I love you too, systemd ...
142  */
143 const char *
144 sd_notify_status(void)
145 {
146         switch (running_state) {
147         case DAEMON_INIT:
148                 return "STATUS=init";
149         case DAEMON_START:
150                 return "STATUS=startup";
151         case DAEMON_CONFIGURE:
152                 return "STATUS=configure";
153         case DAEMON_IDLE:
154                 return "STATUS=idle";
155         case DAEMON_RUNNING:
156                 return "STATUS=running";
157         case DAEMON_SHUTDOWN:
158                 return "STATUS=shutdown";
159         }
160         return NULL;
161 }
162
163 static void config_cleanup(void *arg)
164 {
165         pthread_mutex_unlock(&config_lock);
166 }
167
168 void post_config_state(enum daemon_status state)
169 {
170         pthread_mutex_lock(&config_lock);
171         if (state != running_state) {
172                 running_state = state;
173                 pthread_cond_broadcast(&config_cond);
174 #ifdef USE_SYSTEMD
175                 sd_notify(0, sd_notify_status());
176 #endif
177         }
178         pthread_mutex_unlock(&config_lock);
179 }
180
181 int set_config_state(enum daemon_status state)
182 {
183         int rc = 0;
184
185         pthread_cleanup_push(config_cleanup, NULL);
186         pthread_mutex_lock(&config_lock);
187         if (running_state != state) {
188                 if (running_state != DAEMON_IDLE) {
189                         struct timespec ts;
190
191                         clock_gettime(CLOCK_REALTIME, &ts);
192                         ts.tv_sec += 1;
193                         rc = pthread_cond_timedwait(&config_cond,
194                                                     &config_lock, &ts);
195                 }
196                 if (!rc) {
197                         running_state = state;
198                         pthread_cond_broadcast(&config_cond);
199 #ifdef USE_SYSTEMD
200                         sd_notify(0, sd_notify_status());
201 #endif
202                 }
203         }
204         pthread_cleanup_pop(1);
205         return rc;
206 }
207
208 struct config *get_multipath_config(void)
209 {
210         rcu_read_lock();
211         return rcu_dereference(multipath_conf);
212 }
213
214 void put_multipath_config(struct config *conf)
215 {
216         rcu_read_unlock();
217 }
218
219 static int
220 need_switch_pathgroup (struct multipath * mpp, int refresh)
221 {
222         struct pathgroup * pgp;
223         struct path * pp;
224         unsigned int i, j;
225         struct config *conf;
226
227         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
228                 return 0;
229
230         /*
231          * Refresh path priority values
232          */
233         if (refresh) {
234                 vector_foreach_slot (mpp->pg, pgp, i) {
235                         vector_foreach_slot (pgp->paths, pp, j) {
236                                 conf = get_multipath_config();
237                                 pathinfo(pp, conf, DI_PRIO);
238                                 put_multipath_config(conf);
239                         }
240                 }
241         }
242
243         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
244                 return 0;
245
246         mpp->bestpg = select_path_group(mpp);
247
248         if (mpp->bestpg != mpp->nextpg)
249                 return 1;
250
251         return 0;
252 }
253
254 static void
255 switch_pathgroup (struct multipath * mpp)
256 {
257         mpp->stat_switchgroup++;
258         dm_switchgroup(mpp->alias, mpp->bestpg);
259         condlog(2, "%s: switch to path group #%i",
260                  mpp->alias, mpp->bestpg);
261 }
262
263 static int
264 coalesce_maps(struct vectors *vecs, vector nmpv)
265 {
266         struct multipath * ompp;
267         vector ompv = vecs->mpvec;
268         unsigned int i, reassign_maps;
269         struct config *conf;
270
271         conf = get_multipath_config();
272         reassign_maps = conf->reassign_maps;
273         put_multipath_config(conf);
274         vector_foreach_slot (ompv, ompp, i) {
275                 condlog(3, "%s: coalesce map", ompp->alias);
276                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
277                         /*
278                          * remove all current maps not allowed by the
279                          * current configuration
280                          */
281                         if (dm_flush_map(ompp->alias)) {
282                                 condlog(0, "%s: unable to flush devmap",
283                                         ompp->alias);
284                                 /*
285                                  * may be just because the device is open
286                                  */
287                                 if (setup_multipath(vecs, ompp) != 0) {
288                                         i--;
289                                         continue;
290                                 }
291                                 if (!vector_alloc_slot(nmpv))
292                                         return 1;
293
294                                 vector_set_slot(nmpv, ompp);
295
296                                 vector_del_slot(ompv, i);
297                                 i--;
298                         }
299                         else {
300                                 dm_lib_release();
301                                 condlog(2, "%s devmap removed", ompp->alias);
302                         }
303                 } else if (reassign_maps) {
304                         condlog(3, "%s: Reassign existing device-mapper"
305                                 " devices", ompp->alias);
306                         dm_reassign(ompp->alias);
307                 }
308         }
309         return 0;
310 }
311
312 void
313 sync_map_state(struct multipath *mpp)
314 {
315         struct pathgroup *pgp;
316         struct path *pp;
317         unsigned int i, j;
318
319         if (!mpp->pg)
320                 return;
321
322         vector_foreach_slot (mpp->pg, pgp, i){
323                 vector_foreach_slot (pgp->paths, pp, j){
324                         if (pp->state == PATH_UNCHECKED ||
325                             pp->state == PATH_WILD ||
326                             pp->state == PATH_DELAYED)
327                                 continue;
328                         if ((pp->dmstate == PSTATE_FAILED ||
329                              pp->dmstate == PSTATE_UNDEF) &&
330                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
331                                 dm_reinstate_path(mpp->alias, pp->dev_t);
332                         else if ((pp->dmstate == PSTATE_ACTIVE ||
333                                   pp->dmstate == PSTATE_UNDEF) &&
334                                  (pp->state == PATH_DOWN ||
335                                   pp->state == PATH_SHAKY))
336                                 dm_fail_path(mpp->alias, pp->dev_t);
337                 }
338         }
339 }
340
341 static void
342 sync_maps_state(vector mpvec)
343 {
344         unsigned int i;
345         struct multipath *mpp;
346
347         vector_foreach_slot (mpvec, mpp, i)
348                 sync_map_state(mpp);
349 }
350
351 static int
352 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
353 {
354         int r;
355
356         if (nopaths)
357                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
358         else
359                 r = dm_flush_map(mpp->alias);
360         /*
361          * clear references to this map before flushing so we can ignore
362          * the spurious uevent we may generate with the dm_flush_map call below
363          */
364         if (r) {
365                 /*
366                  * May not really be an error -- if the map was already flushed
367                  * from the device mapper by dmsetup(8) for instance.
368                  */
369                 if (r == 1)
370                         condlog(0, "%s: can't flush", mpp->alias);
371                 else {
372                         condlog(2, "%s: devmap deferred remove", mpp->alias);
373                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
374                 }
375                 return r;
376         }
377         else {
378                 dm_lib_release();
379                 condlog(2, "%s: map flushed", mpp->alias);
380         }
381
382         orphan_paths(vecs->pathvec, mpp);
383         remove_map_and_stop_waiter(mpp, vecs, 1);
384
385         return 0;
386 }
387
388 int
389 update_map (struct multipath *mpp, struct vectors *vecs)
390 {
391         int retries = 3;
392         char params[PARAMS_SIZE] = {0};
393
394 retry:
395         condlog(4, "%s: updating new map", mpp->alias);
396         if (adopt_paths(vecs->pathvec, mpp)) {
397                 condlog(0, "%s: failed to adopt paths for new map update",
398                         mpp->alias);
399                 retries = -1;
400                 goto fail;
401         }
402         verify_paths(mpp, vecs);
403         mpp->flush_on_last_del = FLUSH_UNDEF;
404         mpp->action = ACT_RELOAD;
405
406         if (setup_map(mpp, params, PARAMS_SIZE)) {
407                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
408                 retries = -1;
409                 goto fail;
410         }
411         if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
412                 condlog(0, "%s: map_udate sleep", mpp->alias);
413                 sleep(1);
414                 goto retry;
415         }
416         dm_lib_release();
417
418 fail:
419         if (setup_multipath(vecs, mpp))
420                 return 1;
421
422         sync_map_state(mpp);
423
424         if (retries < 0)
425                 condlog(0, "%s: failed reload in new map update", mpp->alias);
426         return 0;
427 }
428
429 static int
430 uev_add_map (struct uevent * uev, struct vectors * vecs)
431 {
432         char *alias;
433         int major = -1, minor = -1, rc;
434
435         condlog(3, "%s: add map (uevent)", uev->kernel);
436         alias = uevent_get_dm_name(uev);
437         if (!alias) {
438                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
439                 major = uevent_get_major(uev);
440                 minor = uevent_get_minor(uev);
441                 alias = dm_mapname(major, minor);
442                 if (!alias) {
443                         condlog(2, "%s: mapname not found for %d:%d",
444                                 uev->kernel, major, minor);
445                         return 1;
446                 }
447         }
448         pthread_cleanup_push(cleanup_lock, &vecs->lock);
449         lock(vecs->lock);
450         pthread_testcancel();
451         rc = ev_add_map(uev->kernel, alias, vecs);
452         lock_cleanup_pop(vecs->lock);
453         FREE(alias);
454         return rc;
455 }
456
457 int
458 ev_add_map (char * dev, char * alias, struct vectors * vecs)
459 {
460         char * refwwid;
461         struct multipath * mpp;
462         int map_present;
463         int r = 1, delayed_reconfig, reassign_maps;
464         struct config *conf;
465
466         map_present = dm_map_present(alias);
467
468         if (map_present && !dm_is_mpath(alias)) {
469                 condlog(4, "%s: not a multipath map", alias);
470                 return 0;
471         }
472
473         mpp = find_mp_by_alias(vecs->mpvec, alias);
474
475         if (mpp) {
476                 if (mpp->wait_for_udev > 1) {
477                         if (update_map(mpp, vecs))
478                                 /* setup multipathd removed the map */
479                                 return 1;
480                 }
481                 conf = get_multipath_config();
482                 delayed_reconfig = conf->delayed_reconfig;
483                 reassign_maps = conf->reassign_maps;
484                 put_multipath_config(conf);
485                 if (mpp->wait_for_udev) {
486                         mpp->wait_for_udev = 0;
487                         if (delayed_reconfig &&
488                             !need_to_delay_reconfig(vecs)) {
489                                 condlog(2, "reconfigure (delayed)");
490                                 set_config_state(DAEMON_CONFIGURE);
491                                 return 0;
492                         }
493                 }
494                 /*
495                  * Not really an error -- we generate our own uevent
496                  * if we create a multipath mapped device as a result
497                  * of uev_add_path
498                  */
499                 if (reassign_maps) {
500                         condlog(3, "%s: Reassign existing device-mapper devices",
501                                 alias);
502                         dm_reassign(alias);
503                 }
504                 return 0;
505         }
506         condlog(2, "%s: adding map", alias);
507
508         /*
509          * now we can register the map
510          */
511         if (map_present) {
512                 if ((mpp = add_map_without_path(vecs, alias))) {
513                         sync_map_state(mpp);
514                         condlog(2, "%s: devmap %s registered", alias, dev);
515                         return 0;
516                 } else {
517                         condlog(2, "%s: uev_add_map failed", dev);
518                         return 1;
519                 }
520         }
521         r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
522
523         if (refwwid) {
524                 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
525                 dm_lib_release();
526         }
527
528         if (!r)
529                 condlog(2, "%s: devmap %s added", alias, dev);
530         else if (r == 2)
531                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
532         else
533                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
534
535         FREE(refwwid);
536         return r;
537 }
538
539 static int
540 uev_remove_map (struct uevent * uev, struct vectors * vecs)
541 {
542         char *alias;
543         int minor;
544         struct multipath *mpp;
545
546         condlog(2, "%s: remove map (uevent)", uev->kernel);
547         alias = uevent_get_dm_name(uev);
548         if (!alias) {
549                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
550                 return 0;
551         }
552         minor = uevent_get_minor(uev);
553
554         pthread_cleanup_push(cleanup_lock, &vecs->lock);
555         lock(vecs->lock);
556         pthread_testcancel();
557         mpp = find_mp_by_minor(vecs->mpvec, minor);
558
559         if (!mpp) {
560                 condlog(2, "%s: devmap not registered, can't remove",
561                         uev->kernel);
562                 goto out;
563         }
564         if (strcmp(mpp->alias, alias)) {
565                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
566                         mpp->alias, mpp->dmi->minor, minor);
567                 goto out;
568         }
569
570         orphan_paths(vecs->pathvec, mpp);
571         remove_map_and_stop_waiter(mpp, vecs, 1);
572 out:
573         lock_cleanup_pop(vecs->lock);
574         FREE(alias);
575         return 0;
576 }
577
578 /* Called from CLI handler */
579 int
580 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
581 {
582         struct multipath * mpp;
583
584         mpp = find_mp_by_minor(vecs->mpvec, minor);
585
586         if (!mpp) {
587                 condlog(2, "%s: devmap not registered, can't remove",
588                         devname);
589                 return 1;
590         }
591         if (strcmp(mpp->alias, alias)) {
592                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
593                         mpp->alias, mpp->dmi->minor, minor);
594                 return 1;
595         }
596         return flush_map(mpp, vecs, 0);
597 }
598
599 static int
600 uev_add_path (struct uevent *uev, struct vectors * vecs)
601 {
602         struct path *pp;
603         int ret = 0, i;
604         struct config *conf;
605
606         condlog(2, "%s: add path (uevent)", uev->kernel);
607         if (strstr(uev->kernel, "..") != NULL) {
608                 /*
609                  * Don't allow relative device names in the pathvec
610                  */
611                 condlog(0, "%s: path name is invalid", uev->kernel);
612                 return 1;
613         }
614
615         pthread_cleanup_push(cleanup_lock, &vecs->lock);
616         lock(vecs->lock);
617         pthread_testcancel();
618         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
619         if (pp) {
620                 int r;
621
622                 condlog(0, "%s: spurious uevent, path already in pathvec",
623                         uev->kernel);
624                 if (!pp->mpp && !strlen(pp->wwid)) {
625                         condlog(3, "%s: reinitialize path", uev->kernel);
626                         udev_device_unref(pp->udev);
627                         pp->udev = udev_device_ref(uev->udev);
628                         conf = get_multipath_config();
629                         r = pathinfo(pp, conf,
630                                      DI_ALL | DI_BLACKLIST);
631                         put_multipath_config(conf);
632                         if (r == PATHINFO_OK)
633                                 ret = ev_add_path(pp, vecs);
634                         else if (r == PATHINFO_SKIPPED) {
635                                 condlog(3, "%s: remove blacklisted path",
636                                         uev->kernel);
637                                 i = find_slot(vecs->pathvec, (void *)pp);
638                                 if (i != -1)
639                                         vector_del_slot(vecs->pathvec, i);
640                                 free_path(pp);
641                         } else {
642                                 condlog(0, "%s: failed to reinitialize path",
643                                         uev->kernel);
644                                 ret = 1;
645                         }
646                 }
647         }
648         lock_cleanup_pop(vecs->lock);
649         if (pp)
650                 return ret;
651
652         /*
653          * get path vital state
654          */
655         conf = get_multipath_config();
656         ret = alloc_path_with_pathinfo(conf, uev->udev,
657                                        DI_ALL, &pp);
658         put_multipath_config(conf);
659         if (!pp) {
660                 if (ret == PATHINFO_SKIPPED)
661                         return 0;
662                 condlog(3, "%s: failed to get path info", uev->kernel);
663                 return 1;
664         }
665         pthread_cleanup_push(cleanup_lock, &vecs->lock);
666         lock(vecs->lock);
667         pthread_testcancel();
668         ret = store_path(vecs->pathvec, pp);
669         if (!ret) {
670                 conf = get_multipath_config();
671                 pp->checkint = conf->checkint;
672                 put_multipath_config(conf);
673                 ret = ev_add_path(pp, vecs);
674         } else {
675                 condlog(0, "%s: failed to store path info, "
676                         "dropping event",
677                         uev->kernel);
678                 free_path(pp);
679                 ret = 1;
680         }
681         lock_cleanup_pop(vecs->lock);
682         return ret;
683 }
684
685 /*
686  * returns:
687  * 0: added
688  * 1: error
689  */
690 int
691 ev_add_path (struct path * pp, struct vectors * vecs)
692 {
693         struct multipath * mpp;
694         char params[PARAMS_SIZE] = {0};
695         int retries = 3;
696         int start_waiter = 0;
697         int ret;
698
699         /*
700          * need path UID to go any further
701          */
702         if (strlen(pp->wwid) == 0) {
703                 condlog(0, "%s: failed to get path uid", pp->dev);
704                 goto fail; /* leave path added to pathvec */
705         }
706         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
707         if (mpp && mpp->wait_for_udev) {
708                 mpp->wait_for_udev = 2;
709                 orphan_path(pp, "waiting for create to complete");
710                 return 0;
711         }
712
713         pp->mpp = mpp;
714 rescan:
715         if (mpp) {
716                 if (pp->size && mpp->size != pp->size) {
717                         condlog(0, "%s: failed to add new path %s, "
718                                 "device size mismatch",
719                                 mpp->alias, pp->dev);
720                         int i = find_slot(vecs->pathvec, (void *)pp);
721                         if (i != -1)
722                                 vector_del_slot(vecs->pathvec, i);
723                         free_path(pp);
724                         return 1;
725                 }
726
727                 condlog(4,"%s: adopting all paths for path %s",
728                         mpp->alias, pp->dev);
729                 if (adopt_paths(vecs->pathvec, mpp))
730                         goto fail; /* leave path added to pathvec */
731
732                 verify_paths(mpp, vecs);
733                 mpp->flush_on_last_del = FLUSH_UNDEF;
734                 mpp->action = ACT_RELOAD;
735         } else {
736                 if (!should_multipath(pp, vecs->pathvec)) {
737                         orphan_path(pp, "only one path");
738                         return 0;
739                 }
740                 condlog(4,"%s: creating new map", pp->dev);
741                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
742                         mpp->action = ACT_CREATE;
743                         /*
744                          * We don't depend on ACT_CREATE, as domap will
745                          * set it to ACT_NOTHING when complete.
746                          */
747                         start_waiter = 1;
748                 }
749                 if (!start_waiter)
750                         goto fail; /* leave path added to pathvec */
751         }
752
753         /* persistent reservation check*/
754         mpath_pr_event_handle(pp);
755
756         /*
757          * push the map to the device-mapper
758          */
759         if (setup_map(mpp, params, PARAMS_SIZE)) {
760                 condlog(0, "%s: failed to setup map for addition of new "
761                         "path %s", mpp->alias, pp->dev);
762                 goto fail_map;
763         }
764         /*
765          * reload the map for the multipath mapped device
766          */
767 retry:
768         ret = domap(mpp, params, 1);
769         if (ret <= 0) {
770                 if (ret < 0 && retries-- > 0) {
771                         condlog(0, "%s: retry domap for addition of new "
772                                 "path %s", mpp->alias, pp->dev);
773                         sleep(1);
774                         goto retry;
775                 }
776                 condlog(0, "%s: failed in domap for addition of new "
777                         "path %s", mpp->alias, pp->dev);
778                 /*
779                  * deal with asynchronous uevents :((
780                  */
781                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
782                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
783                         sleep(1);
784                         update_mpp_paths(mpp, vecs->pathvec);
785                         goto rescan;
786                 }
787                 else if (mpp->action == ACT_RELOAD)
788                         condlog(0, "%s: giving up reload", mpp->alias);
789                 else
790                         goto fail_map;
791         }
792         dm_lib_release();
793
794         /*
795          * update our state from kernel regardless of create or reload
796          */
797         if (setup_multipath(vecs, mpp))
798                 goto fail; /* if setup_multipath fails, it removes the map */
799
800         sync_map_state(mpp);
801
802         if ((mpp->action == ACT_CREATE ||
803              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
804             start_waiter_thread(mpp, vecs))
805                         goto fail_map;
806
807         if (retries >= 0) {
808                 condlog(2, "%s [%s]: path added to devmap %s",
809                         pp->dev, pp->dev_t, mpp->alias);
810                 return 0;
811         } else
812                 goto fail;
813
814 fail_map:
815         remove_map(mpp, vecs, 1);
816 fail:
817         orphan_path(pp, "failed to add path");
818         return 1;
819 }
820
821 static int
822 uev_remove_path (struct uevent *uev, struct vectors * vecs)
823 {
824         struct path *pp;
825         int ret;
826
827         condlog(2, "%s: remove path (uevent)", uev->kernel);
828         pthread_cleanup_push(cleanup_lock, &vecs->lock);
829         lock(vecs->lock);
830         pthread_testcancel();
831         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
832         if (pp)
833                 ret = ev_remove_path(pp, vecs);
834         lock_cleanup_pop(vecs->lock);
835         if (!pp) {
836                 /* Not an error; path might have been purged earlier */
837                 condlog(0, "%s: path already removed", uev->kernel);
838                 return 0;
839         }
840         return ret;
841 }
842
843 int
844 ev_remove_path (struct path *pp, struct vectors * vecs)
845 {
846         struct multipath * mpp;
847         int i, retval = 0;
848         char params[PARAMS_SIZE] = {0};
849
850         /*
851          * avoid referring to the map of an orphaned path
852          */
853         if ((mpp = pp->mpp)) {
854                 /*
855                  * transform the mp->pg vector of vectors of paths
856                  * into a mp->params string to feed the device-mapper
857                  */
858                 if (update_mpp_paths(mpp, vecs->pathvec)) {
859                         condlog(0, "%s: failed to update paths",
860                                 mpp->alias);
861                         goto fail;
862                 }
863                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
864                         vector_del_slot(mpp->paths, i);
865
866                 /*
867                  * remove the map IFF removing the last path
868                  */
869                 if (VECTOR_SIZE(mpp->paths) == 0) {
870                         char alias[WWID_SIZE];
871
872                         /*
873                          * flush_map will fail if the device is open
874                          */
875                         strncpy(alias, mpp->alias, WWID_SIZE);
876                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
877                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
878                                 mpp->retry_tick = 0;
879                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
880                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
881                                 dm_queue_if_no_path(mpp->alias, 0);
882                         }
883                         if (!flush_map(mpp, vecs, 1)) {
884                                 condlog(2, "%s: removed map after"
885                                         " removing all paths",
886                                         alias);
887                                 retval = 0;
888                                 goto out;
889                         }
890                         /*
891                          * Not an error, continue
892                          */
893                 }
894
895                 if (setup_map(mpp, params, PARAMS_SIZE)) {
896                         condlog(0, "%s: failed to setup map for"
897                                 " removal of path %s", mpp->alias, pp->dev);
898                         goto fail;
899                 }
900
901                 if (mpp->wait_for_udev) {
902                         mpp->wait_for_udev = 2;
903                         goto out;
904                 }
905
906                 /*
907                  * reload the map
908                  */
909                 mpp->action = ACT_RELOAD;
910                 if (domap(mpp, params, 1) <= 0) {
911                         condlog(0, "%s: failed in domap for "
912                                 "removal of path %s",
913                                 mpp->alias, pp->dev);
914                         retval = 1;
915                 } else {
916                         /*
917                          * update our state from kernel
918                          */
919                         if (setup_multipath(vecs, mpp))
920                                 return 1;
921                         sync_map_state(mpp);
922
923                         condlog(2, "%s [%s]: path removed from map %s",
924                                 pp->dev, pp->dev_t, mpp->alias);
925                 }
926         }
927
928 out:
929         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
930                 vector_del_slot(vecs->pathvec, i);
931
932         free_path(pp);
933
934         return retval;
935
936 fail:
937         remove_map_and_stop_waiter(mpp, vecs, 1);
938         return 1;
939 }
940
941 static int
942 uev_update_path (struct uevent *uev, struct vectors * vecs)
943 {
944         int ro, retval = 0;
945
946         ro = uevent_get_disk_ro(uev);
947
948         if (ro >= 0) {
949                 struct path * pp;
950                 struct multipath *mpp = NULL;
951
952                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
953                         uev->kernel, ro);
954                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
955                 lock(vecs->lock);
956                 pthread_testcancel();
957                 /*
958                  * pthread_mutex_lock() and pthread_mutex_unlock()
959                  * need to be at the same indentation level, hence
960                  * this slightly convoluted codepath.
961                  */
962                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
963                 if (pp) {
964                         if (pp->initialized == INIT_REQUESTED_UDEV) {
965                                 retval = 2;
966                         } else {
967                                 mpp = pp->mpp;
968                                 if (mpp && mpp->wait_for_udev) {
969                                         mpp->wait_for_udev = 2;
970                                         mpp = NULL;
971                                         retval = 0;
972                                 }
973                         }
974                         if (mpp) {
975                                 retval = reload_map(vecs, mpp, 0, 1);
976
977                                 condlog(2, "%s: map %s reloaded (retval %d)",
978                                         uev->kernel, mpp->alias, retval);
979                         }
980                 }
981                 lock_cleanup_pop(vecs->lock);
982                 if (!pp) {
983                         condlog(0, "%s: spurious uevent, path not found",
984                                 uev->kernel);
985                         return 1;
986                 }
987                 if (retval == 2)
988                         return uev_add_path(uev, vecs);
989         }
990
991         return retval;
992 }
993
994 static int
995 map_discovery (struct vectors * vecs)
996 {
997         struct multipath * mpp;
998         unsigned int i;
999
1000         if (dm_get_maps(vecs->mpvec))
1001                 return 1;
1002
1003         vector_foreach_slot (vecs->mpvec, mpp, i)
1004                 if (setup_multipath(vecs, mpp))
1005                         return 1;
1006
1007         return 0;
1008 }
1009
1010 int
1011 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
1012 {
1013         struct vectors * vecs;
1014         int r;
1015
1016         *reply = NULL;
1017         *len = 0;
1018         vecs = (struct vectors *)trigger_data;
1019
1020         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1021
1022         if (r > 0) {
1023                 if (r == ETIMEDOUT)
1024                         *reply = STRDUP("timeout\n");
1025                 else
1026                         *reply = STRDUP("fail\n");
1027                 *len = strlen(*reply) + 1;
1028                 r = 1;
1029         }
1030         else if (!r && *len == 0) {
1031                 *reply = STRDUP("ok\n");
1032                 *len = strlen(*reply) + 1;
1033                 r = 0;
1034         }
1035         /* else if (r < 0) leave *reply alone */
1036
1037         return r;
1038 }
1039
1040 static int
1041 uev_discard(char * devpath)
1042 {
1043         char *tmp;
1044         char a[11], b[11];
1045
1046         /*
1047          * keep only block devices, discard partitions
1048          */
1049         tmp = strstr(devpath, "/block/");
1050         if (tmp == NULL){
1051                 condlog(4, "no /block/ in '%s'", devpath);
1052                 return 1;
1053         }
1054         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1055             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1056                 condlog(4, "discard event on %s", devpath);
1057                 return 1;
1058         }
1059         return 0;
1060 }
1061
1062 int
1063 uev_trigger (struct uevent * uev, void * trigger_data)
1064 {
1065         int r = 0;
1066         struct vectors * vecs;
1067         struct config *conf;
1068
1069         vecs = (struct vectors *)trigger_data;
1070
1071         if (uev_discard(uev->devpath))
1072                 return 0;
1073
1074         pthread_cleanup_push(config_cleanup, NULL);
1075         pthread_mutex_lock(&config_lock);
1076         if (running_state != DAEMON_IDLE &&
1077             running_state != DAEMON_RUNNING)
1078                 pthread_cond_wait(&config_cond, &config_lock);
1079         pthread_cleanup_pop(1);
1080
1081         if (running_state == DAEMON_SHUTDOWN)
1082                 return 0;
1083
1084         /*
1085          * device map event
1086          * Add events are ignored here as the tables
1087          * are not fully initialised then.
1088          */
1089         if (!strncmp(uev->kernel, "dm-", 3)) {
1090                 if (!strncmp(uev->action, "change", 6)) {
1091                         r = uev_add_map(uev, vecs);
1092                         goto out;
1093                 }
1094                 if (!strncmp(uev->action, "remove", 6)) {
1095                         r = uev_remove_map(uev, vecs);
1096                         goto out;
1097                 }
1098                 goto out;
1099         }
1100
1101         /*
1102          * path add/remove event
1103          */
1104         conf = get_multipath_config();
1105         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1106                            uev->kernel) > 0) {
1107                 put_multipath_config(conf);
1108                 goto out;
1109         }
1110         put_multipath_config(conf);
1111
1112         if (!strncmp(uev->action, "add", 3)) {
1113                 r = uev_add_path(uev, vecs);
1114                 goto out;
1115         }
1116         if (!strncmp(uev->action, "remove", 6)) {
1117                 r = uev_remove_path(uev, vecs);
1118                 goto out;
1119         }
1120         if (!strncmp(uev->action, "change", 6)) {
1121                 r = uev_update_path(uev, vecs);
1122                 goto out;
1123         }
1124
1125 out:
1126         return r;
1127 }
1128
1129 static void *rcu_unregister(void *param)
1130 {
1131         rcu_unregister_thread();
1132         return NULL;
1133 }
1134
1135 static void *
1136 ueventloop (void * ap)
1137 {
1138         struct udev *udev = ap;
1139
1140         pthread_cleanup_push(rcu_unregister, NULL);
1141         rcu_register_thread();
1142         if (uevent_listen(udev))
1143                 condlog(0, "error starting uevent listener");
1144         pthread_cleanup_pop(1);
1145         return NULL;
1146 }
1147
1148 static void *
1149 uevqloop (void * ap)
1150 {
1151         pthread_cleanup_push(rcu_unregister, NULL);
1152         rcu_register_thread();
1153         if (uevent_dispatch(&uev_trigger, ap))
1154                 condlog(0, "error starting uevent dispatcher");
1155         pthread_cleanup_pop(1);
1156         return NULL;
1157 }
1158 static void *
1159 uxlsnrloop (void * ap)
1160 {
1161         if (cli_init()) {
1162                 condlog(1, "Failed to init uxsock listener");
1163                 return NULL;
1164         }
1165         pthread_cleanup_push(rcu_unregister, NULL);
1166         rcu_register_thread();
1167         set_handler_callback(LIST+PATHS, cli_list_paths);
1168         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1169         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1170         set_handler_callback(LIST+PATH, cli_list_path);
1171         set_handler_callback(LIST+MAPS, cli_list_maps);
1172         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1173         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1174         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1175         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1176         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1177         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1178         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1179         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1180         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1181         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1182         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1183         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1184         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1185         set_handler_callback(LIST+CONFIG, cli_list_config);
1186         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1187         set_handler_callback(LIST+DEVICES, cli_list_devices);
1188         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1189         set_handler_callback(ADD+PATH, cli_add_path);
1190         set_handler_callback(DEL+PATH, cli_del_path);
1191         set_handler_callback(ADD+MAP, cli_add_map);
1192         set_handler_callback(DEL+MAP, cli_del_map);
1193         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1194         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1195         set_handler_callback(SUSPEND+MAP, cli_suspend);
1196         set_handler_callback(RESUME+MAP, cli_resume);
1197         set_handler_callback(RESIZE+MAP, cli_resize);
1198         set_handler_callback(RELOAD+MAP, cli_reload);
1199         set_handler_callback(RESET+MAP, cli_reassign);
1200         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1201         set_handler_callback(FAIL+PATH, cli_fail);
1202         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1203         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1204         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1205         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1206         set_unlocked_handler_callback(QUIT, cli_quit);
1207         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1208         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1209         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1210         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1211         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1212         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1213
1214         umask(077);
1215         uxsock_listen(&uxsock_trigger, ap);
1216         pthread_cleanup_pop(1);
1217         return NULL;
1218 }
1219
1220 void
1221 exit_daemon (void)
1222 {
1223         post_config_state(DAEMON_SHUTDOWN);
1224 }
1225
1226 static void
1227 fail_path (struct path * pp, int del_active)
1228 {
1229         if (!pp->mpp)
1230                 return;
1231
1232         condlog(2, "checker failed path %s in map %s",
1233                  pp->dev_t, pp->mpp->alias);
1234
1235         dm_fail_path(pp->mpp->alias, pp->dev_t);
1236         if (del_active)
1237                 update_queue_mode_del_path(pp->mpp);
1238 }
1239
1240 /*
1241  * caller must have locked the path list before calling that function
1242  */
1243 static int
1244 reinstate_path (struct path * pp, int add_active)
1245 {
1246         int ret = 0;
1247
1248         if (!pp->mpp)
1249                 return 0;
1250
1251         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1252                 condlog(0, "%s: reinstate failed", pp->dev_t);
1253                 ret = 1;
1254         } else {
1255                 condlog(2, "%s: reinstated", pp->dev_t);
1256                 if (add_active)
1257                         update_queue_mode_add_path(pp->mpp);
1258         }
1259         return ret;
1260 }
1261
1262 static void
1263 enable_group(struct path * pp)
1264 {
1265         struct pathgroup * pgp;
1266
1267         /*
1268          * if path is added through uev_add_path, pgindex can be unset.
1269          * next update_strings() will set it, upon map reload event.
1270          *
1271          * we can safely return here, because upon map reload, all
1272          * PG will be enabled.
1273          */
1274         if (!pp->mpp->pg || !pp->pgindex)
1275                 return;
1276
1277         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1278
1279         if (pgp->status == PGSTATE_DISABLED) {
1280                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1281                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1282         }
1283 }
1284
1285 static void
1286 mpvec_garbage_collector (struct vectors * vecs)
1287 {
1288         struct multipath * mpp;
1289         unsigned int i;
1290
1291         if (!vecs->mpvec)
1292                 return;
1293
1294         vector_foreach_slot (vecs->mpvec, mpp, i) {
1295                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1296                         condlog(2, "%s: remove dead map", mpp->alias);
1297                         remove_map_and_stop_waiter(mpp, vecs, 1);
1298                         i--;
1299                 }
1300         }
1301 }
1302
1303 /* This is called after a path has started working again. It the multipath
1304  * device for this path uses the followover failback type, and this is the
1305  * best pathgroup, and this is the first path in the pathgroup to come back
1306  * up, then switch to this pathgroup */
1307 static int
1308 followover_should_failback(struct path * pp)
1309 {
1310         struct pathgroup * pgp;
1311         struct path *pp1;
1312         int i;
1313
1314         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1315             !pp->mpp->pg || !pp->pgindex ||
1316             pp->pgindex != pp->mpp->bestpg)
1317                 return 0;
1318
1319         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1320         vector_foreach_slot(pgp->paths, pp1, i) {
1321                 if (pp1 == pp)
1322                         continue;
1323                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1324                         return 0;
1325         }
1326         return 1;
1327 }
1328
1329 static void
1330 missing_uev_wait_tick(struct vectors *vecs)
1331 {
1332         struct multipath * mpp;
1333         unsigned int i;
1334         int timed_out = 0, delayed_reconfig;
1335         struct config *conf;
1336
1337         vector_foreach_slot (vecs->mpvec, mpp, i) {
1338                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1339                         timed_out = 1;
1340                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1341                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1342                                 /* update_map removed map */
1343                                 i--;
1344                                 continue;
1345                         }
1346                         mpp->wait_for_udev = 0;
1347                 }
1348         }
1349
1350         conf = get_multipath_config();
1351         delayed_reconfig = conf->delayed_reconfig;
1352         put_multipath_config(conf);
1353         if (timed_out && delayed_reconfig &&
1354             !need_to_delay_reconfig(vecs)) {
1355                 condlog(2, "reconfigure (delayed)");
1356                 set_config_state(DAEMON_CONFIGURE);
1357         }
1358 }
1359
1360 static void
1361 defered_failback_tick (vector mpvec)
1362 {
1363         struct multipath * mpp;
1364         unsigned int i;
1365
1366         vector_foreach_slot (mpvec, mpp, i) {
1367                 /*
1368                  * defered failback getting sooner
1369                  */
1370                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1371                         mpp->failback_tick--;
1372
1373                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1374                                 switch_pathgroup(mpp);
1375                 }
1376         }
1377 }
1378
1379 static void
1380 retry_count_tick(vector mpvec)
1381 {
1382         struct multipath *mpp;
1383         unsigned int i;
1384
1385         vector_foreach_slot (mpvec, mpp, i) {
1386                 if (mpp->retry_tick > 0) {
1387                         mpp->stat_total_queueing_time++;
1388                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1389                         if(--mpp->retry_tick == 0) {
1390                                 dm_queue_if_no_path(mpp->alias, 0);
1391                                 condlog(2, "%s: Disable queueing", mpp->alias);
1392                         }
1393                 }
1394         }
1395 }
1396
1397 int update_prio(struct path *pp, int refresh_all)
1398 {
1399         int oldpriority;
1400         struct path *pp1;
1401         struct pathgroup * pgp;
1402         int i, j, changed = 0;
1403         struct config *conf;
1404
1405         if (refresh_all) {
1406                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1407                         vector_foreach_slot (pgp->paths, pp1, j) {
1408                                 oldpriority = pp1->priority;
1409                                 conf = get_multipath_config();
1410                                 pathinfo(pp1, conf, DI_PRIO);
1411                                 put_multipath_config(conf);
1412                                 if (pp1->priority != oldpriority)
1413                                         changed = 1;
1414                         }
1415                 }
1416                 return changed;
1417         }
1418         oldpriority = pp->priority;
1419         conf = get_multipath_config();
1420         pathinfo(pp, conf, DI_PRIO);
1421         put_multipath_config(conf);
1422
1423         if (pp->priority == oldpriority)
1424                 return 0;
1425         return 1;
1426 }
1427
1428 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1429 {
1430         if (reload_map(vecs, mpp, refresh, 1))
1431                 return 1;
1432
1433         dm_lib_release();
1434         if (setup_multipath(vecs, mpp) != 0)
1435                 return 1;
1436         sync_map_state(mpp);
1437
1438         return 0;
1439 }
1440
1441 /*
1442  * Returns '1' if the path has been checked, '-1' if it was blacklisted
1443  * and '0' otherwise
1444  */
1445 int
1446 check_path (struct vectors * vecs, struct path * pp, int ticks)
1447 {
1448         int newstate;
1449         int new_path_up = 0;
1450         int chkr_new_path_up = 0;
1451         int add_active;
1452         int disable_reinstate = 0;
1453         int oldchkrstate = pp->chkrstate;
1454         int retrigger_tries, checkint;
1455         struct config *conf;
1456         int ret;
1457
1458         if ((pp->initialized == INIT_OK ||
1459              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1460                 return 0;
1461
1462         if (pp->tick)
1463                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1464         if (pp->tick)
1465                 return 0; /* don't check this path yet */
1466
1467         conf = get_multipath_config();
1468         retrigger_tries = conf->retrigger_tries;
1469         checkint = conf->checkint;
1470         put_multipath_config(conf);
1471         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1472             pp->retriggers < retrigger_tries) {
1473                 condlog(2, "%s: triggering change event to reinitialize",
1474                         pp->dev);
1475                 pp->initialized = INIT_REQUESTED_UDEV;
1476                 pp->retriggers++;
1477                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1478                                      strlen("change"));
1479                 return 0;
1480         }
1481
1482         /*
1483          * provision a next check soonest,
1484          * in case we exit abnormaly from here
1485          */
1486         pp->tick = checkint;
1487
1488         newstate = path_offline(pp);
1489         /*
1490          * Wait for uevent for removed paths;
1491          * some LLDDs like zfcp keep paths unavailable
1492          * without sending uevents.
1493          */
1494         if (newstate == PATH_REMOVED)
1495                 newstate = PATH_DOWN;
1496
1497         if (newstate == PATH_UP) {
1498                 conf = get_multipath_config();
1499                 newstate = get_state(pp, conf, 1);
1500                 put_multipath_config(conf);
1501         } else
1502                 checker_clear_message(&pp->checker);
1503
1504         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1505                 condlog(2, "%s: unusable path", pp->dev);
1506                 conf = get_multipath_config();
1507                 pathinfo(pp, conf, 0);
1508                 put_multipath_config(conf);
1509                 return 1;
1510         }
1511         if (!pp->mpp) {
1512                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1513                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1514                         condlog(2, "%s: add missing path", pp->dev);
1515                         conf = get_multipath_config();
1516                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1517                         if (ret == PATHINFO_OK) {
1518                                 ev_add_path(pp, vecs);
1519                                 pp->tick = 1;
1520                         } else if (ret == PATHINFO_SKIPPED) {
1521                                 put_multipath_config(conf);
1522                                 return -1;
1523                         }
1524                         put_multipath_config(conf);
1525                 }
1526                 return 0;
1527         }
1528         /*
1529          * Async IO in flight. Keep the previous path state
1530          * and reschedule as soon as possible
1531          */
1532         if (newstate == PATH_PENDING) {
1533                 pp->tick = 1;
1534                 return 0;
1535         }
1536         /*
1537          * Synchronize with kernel state
1538          */
1539         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1540                 condlog(1, "%s: Could not synchronize with kernel state",
1541                         pp->dev);
1542                 pp->dmstate = PSTATE_UNDEF;
1543         }
1544         /* if update_multipath_strings orphaned the path, quit early */
1545         if (!pp->mpp)
1546                 return 0;
1547
1548         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1549              pp->wait_checks > 0) {
1550                 if (pp->mpp && pp->mpp->nr_active > 0) {
1551                         pp->state = PATH_DELAYED;
1552                         pp->wait_checks--;
1553                         return 1;
1554                 } else
1555                         pp->wait_checks = 0;
1556         }
1557
1558         /*
1559          * don't reinstate failed path, if its in stand-by
1560          * and if target supports only implicit tpgs mode.
1561          * this will prevent unnecessary i/o by dm on stand-by
1562          * paths if there are no other active paths in map.
1563          */
1564         disable_reinstate = (newstate == PATH_GHOST &&
1565                             pp->mpp->nr_active == 0 &&
1566                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1567
1568         pp->chkrstate = newstate;
1569         if (newstate != pp->state) {
1570                 int oldstate = pp->state;
1571                 pp->state = newstate;
1572
1573                 if (strlen(checker_message(&pp->checker)))
1574                         LOG_MSG(1, checker_message(&pp->checker));
1575
1576                 /*
1577                  * upon state change, reset the checkint
1578                  * to the shortest delay
1579                  */
1580                 conf = get_multipath_config();
1581                 pp->checkint = conf->checkint;
1582                 put_multipath_config(conf);
1583
1584                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1585                         /*
1586                          * proactively fail path in the DM
1587                          */
1588                         if (oldstate == PATH_UP ||
1589                             oldstate == PATH_GHOST) {
1590                                 fail_path(pp, 1);
1591                                 if (pp->mpp->delay_wait_checks > 0 &&
1592                                     pp->watch_checks > 0) {
1593                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1594                                         pp->watch_checks = 0;
1595                                 }
1596                         }else
1597                                 fail_path(pp, 0);
1598
1599                         /*
1600                          * cancel scheduled failback
1601                          */
1602                         pp->mpp->failback_tick = 0;
1603
1604                         pp->mpp->stat_path_failures++;
1605                         return 1;
1606                 }
1607
1608                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1609                         if ( pp->mpp && pp->mpp->prflag ){
1610                                 /*
1611                                  * Check Persistent Reservation.
1612                                  */
1613                         condlog(2, "%s: checking persistent reservation "
1614                                 "registration", pp->dev);
1615                         mpath_pr_event_handle(pp);
1616                         }
1617                 }
1618
1619                 /*
1620                  * reinstate this path
1621                  */
1622                 if (oldstate != PATH_UP &&
1623                     oldstate != PATH_GHOST) {
1624                         if (pp->mpp->delay_watch_checks > 0)
1625                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1626                         add_active = 1;
1627                 } else {
1628                         if (pp->watch_checks > 0)
1629                                 pp->watch_checks--;
1630                         add_active = 0;
1631                 }
1632                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1633                         condlog(3, "%s: reload map", pp->dev);
1634                         ev_add_path(pp, vecs);
1635                         pp->tick = 1;
1636                         return 0;
1637                 }
1638                 new_path_up = 1;
1639
1640                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1641                         chkr_new_path_up = 1;
1642
1643                 /*
1644                  * if at least one path is up in a group, and
1645                  * the group is disabled, re-enable it
1646                  */
1647                 if (newstate == PATH_UP)
1648                         enable_group(pp);
1649         }
1650         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1651                 if ((pp->dmstate == PSTATE_FAILED ||
1652                     pp->dmstate == PSTATE_UNDEF) &&
1653                     !disable_reinstate) {
1654                         /* Clear IO errors */
1655                         if (reinstate_path(pp, 0)) {
1656                                 condlog(3, "%s: reload map", pp->dev);
1657                                 ev_add_path(pp, vecs);
1658                                 pp->tick = 1;
1659                                 return 0;
1660                         }
1661                 } else {
1662                         unsigned int max_checkint;
1663                         LOG_MSG(4, checker_message(&pp->checker));
1664                         conf = get_multipath_config();
1665                         max_checkint = conf->max_checkint;
1666                         put_multipath_config(conf);
1667                         if (pp->checkint != max_checkint) {
1668                                 /*
1669                                  * double the next check delay.
1670                                  * max at conf->max_checkint
1671                                  */
1672                                 if (pp->checkint < (max_checkint / 2))
1673                                         pp->checkint = 2 * pp->checkint;
1674                                 else
1675                                         pp->checkint = max_checkint;
1676
1677                                 condlog(4, "%s: delay next check %is",
1678                                         pp->dev_t, pp->checkint);
1679                         }
1680                         if (pp->watch_checks > 0)
1681                                 pp->watch_checks--;
1682                         pp->tick = pp->checkint;
1683                 }
1684         }
1685         else if (newstate == PATH_DOWN &&
1686                  strlen(checker_message(&pp->checker))) {
1687                 int log_checker_err;
1688
1689                 conf = get_multipath_config();
1690                 log_checker_err = conf->log_checker_err;
1691                 put_multipath_config(conf);
1692                 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1693                         LOG_MSG(3, checker_message(&pp->checker));
1694                 else
1695                         LOG_MSG(2, checker_message(&pp->checker));
1696         }
1697
1698         pp->state = newstate;
1699
1700
1701         if (pp->mpp->wait_for_udev)
1702                 return 1;
1703         /*
1704          * path prio refreshing
1705          */
1706         condlog(4, "path prio refresh");
1707
1708         if (update_prio(pp, new_path_up) &&
1709             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1710              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1711                 update_path_groups(pp->mpp, vecs, !new_path_up);
1712         else if (need_switch_pathgroup(pp->mpp, 0)) {
1713                 if (pp->mpp->pgfailback > 0 &&
1714                     (new_path_up || pp->mpp->failback_tick <= 0))
1715                         pp->mpp->failback_tick =
1716                                 pp->mpp->pgfailback + 1;
1717                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1718                          (chkr_new_path_up && followover_should_failback(pp)))
1719                         switch_pathgroup(pp->mpp);
1720         }
1721         return 1;
1722 }
1723
1724 static void *
1725 checkerloop (void *ap)
1726 {
1727         struct vectors *vecs;
1728         struct path *pp;
1729         int count = 0;
1730         unsigned int i;
1731         struct itimerval timer_tick_it;
1732         struct timeval last_time;
1733         struct config *conf;
1734
1735         pthread_cleanup_push(rcu_unregister, NULL);
1736         rcu_register_thread();
1737         mlockall(MCL_CURRENT | MCL_FUTURE);
1738         vecs = (struct vectors *)ap;
1739         condlog(2, "path checkers start up");
1740
1741         /*
1742          * init the path check interval
1743          */
1744         vector_foreach_slot (vecs->pathvec, pp, i) {
1745                 conf = get_multipath_config();
1746                 pp->checkint = conf->checkint;
1747                 put_multipath_config(conf);
1748         }
1749
1750         /* Tweak start time for initial path check */
1751         if (gettimeofday(&last_time, NULL) != 0)
1752                 last_time.tv_sec = 0;
1753         else
1754                 last_time.tv_sec -= 1;
1755
1756         while (1) {
1757                 struct timeval diff_time, start_time, end_time;
1758                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1759                 sigset_t mask;
1760
1761                 if (gettimeofday(&start_time, NULL) != 0)
1762                         start_time.tv_sec = 0;
1763                 if (start_time.tv_sec && last_time.tv_sec) {
1764                         timersub(&start_time, &last_time, &diff_time);
1765                         condlog(4, "tick (%lu.%06lu secs)",
1766                                 diff_time.tv_sec, diff_time.tv_usec);
1767                         last_time.tv_sec = start_time.tv_sec;
1768                         last_time.tv_usec = start_time.tv_usec;
1769                         ticks = diff_time.tv_sec;
1770                 } else {
1771                         ticks = 1;
1772                         condlog(4, "tick (%d ticks)", ticks);
1773                 }
1774 #ifdef USE_SYSTEMD
1775                 if (use_watchdog)
1776                         sd_notify(0, "WATCHDOG=1");
1777 #endif
1778                 rc = set_config_state(DAEMON_RUNNING);
1779                 if (rc == ETIMEDOUT) {
1780                         condlog(4, "timeout waiting for DAEMON_IDLE");
1781                         continue;
1782                 }
1783                 if (vecs->pathvec) {
1784                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1785                         lock(vecs->lock);
1786                         pthread_testcancel();
1787                         vector_foreach_slot (vecs->pathvec, pp, i) {
1788                                 rc = check_path(vecs, pp, ticks);
1789                                 if (rc < 0) {
1790                                         vector_del_slot(vecs->pathvec, i);
1791                                         free_path(pp);
1792                                         i--;
1793                                 } else
1794                                         num_paths += rc;
1795                         }
1796                         lock_cleanup_pop(vecs->lock);
1797                 }
1798                 if (vecs->mpvec) {
1799                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1800                         lock(vecs->lock);
1801                         pthread_testcancel();
1802                         defered_failback_tick(vecs->mpvec);
1803                         retry_count_tick(vecs->mpvec);
1804                         missing_uev_wait_tick(vecs);
1805                         lock_cleanup_pop(vecs->lock);
1806                 }
1807                 if (count)
1808                         count--;
1809                 else {
1810                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1811                         lock(vecs->lock);
1812                         pthread_testcancel();
1813                         condlog(4, "map garbage collection");
1814                         mpvec_garbage_collector(vecs);
1815                         count = MAPGCINT;
1816                         lock_cleanup_pop(vecs->lock);
1817                 }
1818
1819                 diff_time.tv_usec = 0;
1820                 if (start_time.tv_sec &&
1821                     gettimeofday(&end_time, NULL) == 0) {
1822                         timersub(&end_time, &start_time, &diff_time);
1823                         if (num_paths) {
1824                                 unsigned int max_checkint;
1825
1826                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1827                                         num_paths, num_paths > 1 ? "s" : "",
1828                                         diff_time.tv_sec, diff_time.tv_usec);
1829                                 conf = get_multipath_config();
1830                                 max_checkint = conf->max_checkint;
1831                                 put_multipath_config(conf);
1832                                 if (diff_time.tv_sec > max_checkint)
1833                                         condlog(1, "path checkers took longer "
1834                                                 "than %lu seconds, consider "
1835                                                 "increasing max_polling_interval",
1836                                                 diff_time.tv_sec);
1837                         }
1838                 }
1839
1840                 post_config_state(DAEMON_IDLE);
1841                 conf = get_multipath_config();
1842                 strict_timing = conf->strict_timing;
1843                 put_multipath_config(conf);
1844                 if (!strict_timing)
1845                         sleep(1);
1846                 else {
1847                         timer_tick_it.it_interval.tv_sec = 0;
1848                         timer_tick_it.it_interval.tv_usec = 0;
1849                         if (diff_time.tv_usec) {
1850                                 timer_tick_it.it_value.tv_sec = 0;
1851                                 timer_tick_it.it_value.tv_usec =
1852                                         (unsigned long)1000000 - diff_time.tv_usec;
1853                         } else {
1854                                 timer_tick_it.it_value.tv_sec = 1;
1855                                 timer_tick_it.it_value.tv_usec = 0;
1856                         }
1857                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1858
1859                         sigemptyset(&mask);
1860                         sigaddset(&mask, SIGALRM);
1861                         condlog(3, "waiting for %lu.%06lu secs",
1862                                 timer_tick_it.it_value.tv_sec,
1863                                 timer_tick_it.it_value.tv_usec);
1864                         if (sigwait(&mask, &signo) != 0) {
1865                                 condlog(3, "sigwait failed with error %d",
1866                                         errno);
1867                                 conf = get_multipath_config();
1868                                 conf->strict_timing = 0;
1869                                 put_multipath_config(conf);
1870                                 break;
1871                         }
1872                 }
1873         }
1874         pthread_cleanup_pop(1);
1875         return NULL;
1876 }
1877
1878 int
1879 configure (struct vectors * vecs, int start_waiters)
1880 {
1881         struct multipath * mpp;
1882         struct path * pp;
1883         vector mpvec;
1884         int i, ret;
1885         struct config *conf;
1886
1887         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1888                 return 1;
1889
1890         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1891                 return 1;
1892
1893         if (!(mpvec = vector_alloc()))
1894                 return 1;
1895
1896         /*
1897          * probe for current path (from sysfs) and map (from dm) sets
1898          */
1899         ret = path_discovery(vecs->pathvec, DI_ALL);
1900         if (ret < 0)
1901                 return 1;
1902
1903         vector_foreach_slot (vecs->pathvec, pp, i){
1904                 conf = get_multipath_config();
1905                 if (filter_path(conf, pp) > 0){
1906                         vector_del_slot(vecs->pathvec, i);
1907                         free_path(pp);
1908                         i--;
1909                 }
1910                 else
1911                         pp->checkint = conf->checkint;
1912                 put_multipath_config(conf);
1913         }
1914         if (map_discovery(vecs))
1915                 return 1;
1916
1917         /*
1918          * create new set of maps & push changed ones into dm
1919          */
1920         if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE))
1921                 return 1;
1922
1923         /*
1924          * may need to remove some maps which are no longer relevant
1925          * e.g., due to blacklist changes in conf file
1926          */
1927         if (coalesce_maps(vecs, mpvec))
1928                 return 1;
1929
1930         dm_lib_release();
1931
1932         sync_maps_state(mpvec);
1933         vector_foreach_slot(mpvec, mpp, i){
1934                 remember_wwid(mpp->wwid);
1935                 update_map_pr(mpp);
1936         }
1937
1938         /*
1939          * purge dm of old maps
1940          */
1941         remove_maps(vecs);
1942
1943         /*
1944          * save new set of maps formed by considering current path state
1945          */
1946         vector_free(vecs->mpvec);
1947         vecs->mpvec = mpvec;
1948
1949         /*
1950          * start dm event waiter threads for these new maps
1951          */
1952         vector_foreach_slot(vecs->mpvec, mpp, i) {
1953                 if (setup_multipath(vecs, mpp))
1954                         return 1;
1955                 if (start_waiters)
1956                         if (start_waiter_thread(mpp, vecs))
1957                                 return 1;
1958         }
1959         return 0;
1960 }
1961
1962 int
1963 need_to_delay_reconfig(struct vectors * vecs)
1964 {
1965         struct multipath *mpp;
1966         int i;
1967
1968         if (!VECTOR_SIZE(vecs->mpvec))
1969                 return 0;
1970
1971         vector_foreach_slot(vecs->mpvec, mpp, i) {
1972                 if (mpp->wait_for_udev)
1973                         return 1;
1974         }
1975         return 0;
1976 }
1977
1978 void rcu_free_config(struct rcu_head *head)
1979 {
1980         struct config *conf = container_of(head, struct config, rcu);
1981
1982         free_config(conf);
1983 }
1984
1985 int
1986 reconfigure (struct vectors * vecs)
1987 {
1988         struct config * old, *conf;
1989
1990         conf = load_config(DEFAULT_CONFIGFILE);
1991         if (!conf)
1992                 return 1;
1993
1994         /*
1995          * free old map and path vectors ... they use old conf state
1996          */
1997         if (VECTOR_SIZE(vecs->mpvec))
1998                 remove_maps_and_stop_waiters(vecs);
1999
2000         free_pathvec(vecs->pathvec, FREE_PATHS);
2001         vecs->pathvec = NULL;
2002
2003         /* Re-read any timezone changes */
2004         tzset();
2005
2006         dm_drv_version(conf->version, TGT_MPATH);
2007         if (verbosity)
2008                 conf->verbosity = verbosity;
2009         if (bindings_read_only)
2010                 conf->bindings_read_only = bindings_read_only;
2011         if (ignore_new_devs)
2012                 conf->ignore_new_devs = ignore_new_devs;
2013         uxsock_timeout = conf->uxsock_timeout;
2014
2015         old = rcu_dereference(multipath_conf);
2016         rcu_assign_pointer(multipath_conf, conf);
2017         call_rcu(&old->rcu, rcu_free_config);
2018
2019         configure(vecs, 1);
2020
2021
2022         return 0;
2023 }
2024
2025 static struct vectors *
2026 init_vecs (void)
2027 {
2028         struct vectors * vecs;
2029
2030         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2031
2032         if (!vecs)
2033                 return NULL;
2034
2035         vecs->lock.mutex =
2036                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
2037
2038         if (!vecs->lock.mutex)
2039                 goto out;
2040
2041         pthread_mutex_init(vecs->lock.mutex, NULL);
2042         vecs->lock.depth = 0;
2043
2044         return vecs;
2045
2046 out:
2047         FREE(vecs);
2048         condlog(0, "failed to init paths");
2049         return NULL;
2050 }
2051
2052 static void *
2053 signal_set(int signo, void (*func) (int))
2054 {
2055         int r;
2056         struct sigaction sig;
2057         struct sigaction osig;
2058
2059         sig.sa_handler = func;
2060         sigemptyset(&sig.sa_mask);
2061         sig.sa_flags = 0;
2062
2063         r = sigaction(signo, &sig, &osig);
2064
2065         if (r < 0)
2066                 return (SIG_ERR);
2067         else
2068                 return (osig.sa_handler);
2069 }
2070
2071 void
2072 handle_signals(void)
2073 {
2074         if (reconfig_sig) {
2075                 condlog(2, "reconfigure (signal)");
2076                 set_config_state(DAEMON_CONFIGURE);
2077         }
2078         if (log_reset_sig) {
2079                 condlog(2, "reset log (signal)");
2080                 pthread_mutex_lock(&logq_lock);
2081                 log_reset("multipathd");
2082                 pthread_mutex_unlock(&logq_lock);
2083         }
2084         reconfig_sig = 0;
2085         log_reset_sig = 0;
2086 }
2087
2088 static void
2089 sighup (int sig)
2090 {
2091         reconfig_sig = 1;
2092 }
2093
2094 static void
2095 sigend (int sig)
2096 {
2097         exit_daemon();
2098 }
2099
2100 static void
2101 sigusr1 (int sig)
2102 {
2103         log_reset_sig = 1;
2104 }
2105
2106 static void
2107 sigusr2 (int sig)
2108 {
2109         condlog(3, "SIGUSR2 received");
2110 }
2111
2112 static void
2113 signal_init(void)
2114 {
2115         sigset_t set;
2116
2117         sigemptyset(&set);
2118         sigaddset(&set, SIGHUP);
2119         sigaddset(&set, SIGUSR1);
2120         sigaddset(&set, SIGUSR2);
2121         sigaddset(&set, SIGALRM);
2122         pthread_sigmask(SIG_BLOCK, &set, NULL);
2123
2124         signal_set(SIGHUP, sighup);
2125         signal_set(SIGUSR1, sigusr1);
2126         signal_set(SIGUSR2, sigusr2);
2127         signal_set(SIGINT, sigend);
2128         signal_set(SIGTERM, sigend);
2129         signal(SIGPIPE, SIG_IGN);
2130 }
2131
2132 static void
2133 setscheduler (void)
2134 {
2135         int res;
2136         static struct sched_param sched_param = {
2137                 .sched_priority = 99
2138         };
2139
2140         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2141
2142         if (res == -1)
2143                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2144         return;
2145 }
2146
2147 static void
2148 set_oom_adj (void)
2149 {
2150 #ifdef OOM_SCORE_ADJ_MIN
2151         int retry = 1;
2152         char *file = "/proc/self/oom_score_adj";
2153         int score = OOM_SCORE_ADJ_MIN;
2154 #else
2155         int retry = 0;
2156         char *file = "/proc/self/oom_adj";
2157         int score = OOM_ADJUST_MIN;
2158 #endif
2159         FILE *fp;
2160         struct stat st;
2161         char *envp;
2162
2163         envp = getenv("OOMScoreAdjust");
2164         if (envp) {
2165                 condlog(3, "Using systemd provided OOMScoreAdjust");
2166                 return;
2167         }
2168         do {
2169                 if (stat(file, &st) == 0){
2170                         fp = fopen(file, "w");
2171                         if (!fp) {
2172                                 condlog(0, "couldn't fopen %s : %s", file,
2173                                         strerror(errno));
2174                                 return;
2175                         }
2176                         fprintf(fp, "%i", score);
2177                         fclose(fp);
2178                         return;
2179                 }
2180                 if (errno != ENOENT) {
2181                         condlog(0, "couldn't stat %s : %s", file,
2182                                 strerror(errno));
2183                         return;
2184                 }
2185 #ifdef OOM_ADJUST_MIN
2186                 file = "/proc/self/oom_adj";
2187                 score = OOM_ADJUST_MIN;
2188 #else
2189                 retry = 0;
2190 #endif
2191         } while (retry--);
2192         condlog(0, "couldn't adjust oom score");
2193 }
2194
2195 static int
2196 child (void * param)
2197 {
2198         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2199         pthread_attr_t log_attr, misc_attr, uevent_attr;
2200         struct vectors * vecs;
2201         struct multipath * mpp;
2202         int i;
2203 #ifdef USE_SYSTEMD
2204         unsigned long checkint;
2205 #endif
2206         int rc;
2207         int pid_fd = -1;
2208         struct config *conf;
2209         char *envp;
2210
2211         mlockall(MCL_CURRENT | MCL_FUTURE);
2212         signal_init();
2213         rcu_init();
2214
2215         setup_thread_attr(&misc_attr, 64 * 1024, 1);
2216         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 1);
2217         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2218
2219         if (logsink == 1) {
2220                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2221                 log_thread_start(&log_attr);
2222                 pthread_attr_destroy(&log_attr);
2223         }
2224         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2225         if (pid_fd < 0) {
2226                 condlog(1, "failed to create pidfile");
2227                 if (logsink == 1)
2228                         log_thread_stop();
2229                 exit(1);
2230         }
2231
2232         post_config_state(DAEMON_START);
2233
2234         condlog(2, "--------start up--------");
2235         condlog(2, "read " DEFAULT_CONFIGFILE);
2236
2237         conf = load_config(DEFAULT_CONFIGFILE);
2238         if (!conf)
2239                 goto failed;
2240
2241         if (verbosity)
2242                 conf->verbosity = verbosity;
2243         if (bindings_read_only)
2244                 conf->bindings_read_only = bindings_read_only;
2245         if (ignore_new_devs)
2246                 conf->ignore_new_devs = ignore_new_devs;
2247         uxsock_timeout = conf->uxsock_timeout;
2248         multipath_conf = conf;
2249         dm_init(conf->verbosity);
2250         dm_drv_version(conf->version, TGT_MPATH);
2251         if (init_checkers(conf->multipath_dir)) {
2252                 condlog(0, "failed to initialize checkers");
2253                 goto failed;
2254         }
2255         if (init_prio(conf->multipath_dir)) {
2256                 condlog(0, "failed to initialize prioritizers");
2257                 goto failed;
2258         }
2259
2260         setlogmask(LOG_UPTO(conf->verbosity + 3));
2261
2262         envp = getenv("LimitNOFILE");
2263
2264         if (envp) {
2265                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2266         } else if (conf->max_fds) {
2267                 struct rlimit fd_limit;
2268
2269                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2270                         condlog(0, "can't get open fds limit: %s",
2271                                 strerror(errno));
2272                         fd_limit.rlim_cur = 0;
2273                         fd_limit.rlim_max = 0;
2274                 }
2275                 if (fd_limit.rlim_cur < conf->max_fds) {
2276                         fd_limit.rlim_cur = conf->max_fds;
2277                         if (fd_limit.rlim_max < conf->max_fds)
2278                                 fd_limit.rlim_max = conf->max_fds;
2279                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2280                                 condlog(0, "can't set open fds limit to "
2281                                         "%lu/%lu : %s",
2282                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2283                                         strerror(errno));
2284                         } else {
2285                                 condlog(3, "set open fds limit to %lu/%lu",
2286                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2287                         }
2288                 }
2289
2290         }
2291
2292         vecs = gvecs = init_vecs();
2293         if (!vecs)
2294                 goto failed;
2295
2296         setscheduler();
2297         set_oom_adj();
2298
2299         dm_udev_set_sync_support(0);
2300 #ifdef USE_SYSTEMD
2301         envp = getenv("WATCHDOG_USEC");
2302         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2303                 /* Value is in microseconds */
2304                 conf->max_checkint = checkint / 1000000;
2305                 /* Rescale checkint */
2306                 if (conf->checkint > conf->max_checkint)
2307                         conf->checkint = conf->max_checkint;
2308                 else
2309                         conf->checkint = conf->max_checkint / 4;
2310                 condlog(3, "enabling watchdog, interval %d max %d",
2311                         conf->checkint, conf->max_checkint);
2312                 use_watchdog = conf->checkint;
2313         }
2314 #endif
2315         /*
2316          * Signal start of configuration
2317          */
2318         post_config_state(DAEMON_CONFIGURE);
2319
2320         /*
2321          * Start uevent listener early to catch events
2322          */
2323         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2324                 condlog(0, "failed to create uevent thread: %d", rc);
2325                 goto failed;
2326         }
2327         pthread_attr_destroy(&uevent_attr);
2328         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2329                 condlog(0, "failed to create cli listener: %d", rc);
2330                 goto failed;
2331         }
2332
2333         /*
2334          * start threads
2335          */
2336         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2337                 condlog(0,"failed to create checker loop thread: %d", rc);
2338                 goto failed;
2339         }
2340         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2341                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2342                 goto failed;
2343         }
2344         pthread_attr_destroy(&misc_attr);
2345
2346 #ifdef USE_SYSTEMD
2347         sd_notify(0, "READY=1");
2348 #endif
2349
2350         while (running_state != DAEMON_SHUTDOWN) {
2351                 pthread_cleanup_push(config_cleanup, NULL);
2352                 pthread_mutex_lock(&config_lock);
2353                 if (running_state != DAEMON_CONFIGURE &&
2354                     running_state != DAEMON_SHUTDOWN) {
2355                         pthread_cond_wait(&config_cond, &config_lock);
2356                 }
2357                 pthread_cleanup_pop(1);
2358                 if (running_state == DAEMON_CONFIGURE) {
2359                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2360                         lock(vecs->lock);
2361                         pthread_testcancel();
2362                         if (!need_to_delay_reconfig(vecs)) {
2363                                 reconfigure(vecs);
2364                         } else {
2365                                 conf->delayed_reconfig = 1;
2366                         }
2367                         lock_cleanup_pop(vecs->lock);
2368                         post_config_state(DAEMON_IDLE);
2369                 }
2370         }
2371
2372         lock(vecs->lock);
2373         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2374                 vector_foreach_slot(vecs->mpvec, mpp, i)
2375                         dm_queue_if_no_path(mpp->alias, 0);
2376         remove_maps_and_stop_waiters(vecs);
2377         unlock(vecs->lock);
2378
2379         pthread_cancel(check_thr);
2380         pthread_cancel(uevent_thr);
2381         pthread_cancel(uxlsnr_thr);
2382         pthread_cancel(uevq_thr);
2383
2384         lock(vecs->lock);
2385         free_pathvec(vecs->pathvec, FREE_PATHS);
2386         vecs->pathvec = NULL;
2387         unlock(vecs->lock);
2388         /* Now all the waitevent threads will start rushing in. */
2389         while (vecs->lock.depth > 0) {
2390                 sleep (1); /* This is weak. */
2391                 condlog(3, "Have %d wait event checkers threads to de-alloc,"
2392                         " waiting...", vecs->lock.depth);
2393         }
2394         pthread_mutex_destroy(vecs->lock.mutex);
2395         FREE(vecs->lock.mutex);
2396         vecs->lock.depth = 0;
2397         vecs->lock.mutex = NULL;
2398         FREE(vecs);
2399         vecs = NULL;
2400
2401         cleanup_checkers();
2402         cleanup_prio();
2403
2404         dm_lib_release();
2405         dm_lib_exit();
2406
2407         /* We're done here */
2408         condlog(3, "unlink pidfile");
2409         unlink(DEFAULT_PIDFILE);
2410
2411         condlog(2, "--------shut down-------");
2412
2413         if (logsink == 1)
2414                 log_thread_stop();
2415
2416         /*
2417          * Freeing config must be done after condlog() and dm_lib_exit(),
2418          * because logging functions like dlog() and dm_write_log()
2419          * reference the config.
2420          */
2421         free_config(conf);
2422         conf = NULL;
2423         udev_unref(udev);
2424         udev = NULL;
2425         pthread_attr_destroy(&waiter_attr);
2426 #ifdef _DEBUG_
2427         dbg_free_final(NULL);
2428 #endif
2429
2430 #ifdef USE_SYSTEMD
2431         sd_notify(0, "ERRNO=0");
2432 #endif
2433         exit(0);
2434
2435 failed:
2436 #ifdef USE_SYSTEMD
2437         sd_notify(0, "ERRNO=1");
2438 #endif
2439         if (pid_fd >= 0)
2440                 close(pid_fd);
2441         exit(1);
2442 }
2443
2444 static int
2445 daemonize(void)
2446 {
2447         int pid;
2448         int dev_null_fd;
2449
2450         if( (pid = fork()) < 0){
2451                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2452                 return -1;
2453         }
2454         else if (pid != 0)
2455                 return pid;
2456
2457         setsid();
2458
2459         if ( (pid = fork()) < 0)
2460                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2461         else if (pid != 0)
2462                 _exit(0);
2463
2464         if (chdir("/") < 0)
2465                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2466
2467         dev_null_fd = open("/dev/null", O_RDWR);
2468         if (dev_null_fd < 0){
2469                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2470                         strerror(errno));
2471                 _exit(0);
2472         }
2473
2474         close(STDIN_FILENO);
2475         if (dup(dev_null_fd) < 0) {
2476                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2477                         strerror(errno));
2478                 _exit(0);
2479         }
2480         close(STDOUT_FILENO);
2481         if (dup(dev_null_fd) < 0) {
2482                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2483                         strerror(errno));
2484                 _exit(0);
2485         }
2486         close(STDERR_FILENO);
2487         if (dup(dev_null_fd) < 0) {
2488                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2489                         strerror(errno));
2490                 _exit(0);
2491         }
2492         close(dev_null_fd);
2493         daemon_pid = getpid();
2494         return 0;
2495 }
2496
2497 int
2498 main (int argc, char *argv[])
2499 {
2500         extern char *optarg;
2501         extern int optind;
2502         int arg;
2503         int err;
2504         int foreground = 0;
2505         struct config *conf;
2506
2507         logsink = 1;
2508
2509         if (getuid() != 0) {
2510                 fprintf(stderr, "need to be root\n");
2511                 exit(1);
2512         }
2513
2514         /* make sure we don't lock any path */
2515         if (chdir("/") < 0)
2516                 fprintf(stderr, "can't chdir to root directory : %s\n",
2517                         strerror(errno));
2518         umask(umask(077) | 022);
2519
2520         udev = udev_new();
2521
2522         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2523         switch(arg) {
2524                 case 'd':
2525                         foreground = 1;
2526                         if (logsink > 0)
2527                                 logsink = 0;
2528                         //debug=1; /* ### comment me out ### */
2529                         break;
2530                 case 'v':
2531                         if (sizeof(optarg) > sizeof(char *) ||
2532                             !isdigit(optarg[0]))
2533                                 exit(1);
2534
2535                         verbosity = atoi(optarg);
2536                         break;
2537                 case 's':
2538                         logsink = -1;
2539                         break;
2540                 case 'k':
2541                         conf = load_config(DEFAULT_CONFIGFILE);
2542                         if (!conf)
2543                                 exit(1);
2544                         if (verbosity)
2545                                 conf->verbosity = verbosity;
2546                         uxclnt(optarg, uxsock_timeout + 100);
2547                         exit(0);
2548                 case 'B':
2549                         bindings_read_only = 1;
2550                         break;
2551                 case 'n':
2552                         ignore_new_devs = 1;
2553                         break;
2554                 default:
2555                         fprintf(stderr, "Invalid argument '-%c'\n",
2556                                 optopt);
2557                         exit(1);
2558                 }
2559         }
2560         if (optind < argc) {
2561                 char cmd[CMDSIZE];
2562                 char * s = cmd;
2563                 char * c = s;
2564
2565                 conf = load_config(DEFAULT_CONFIGFILE);
2566                 if (!conf)
2567                         exit(1);
2568                 if (verbosity)
2569                         conf->verbosity = verbosity;
2570                 memset(cmd, 0x0, CMDSIZE);
2571                 while (optind < argc) {
2572                         if (strchr(argv[optind], ' '))
2573                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2574                         else
2575                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2576                         optind++;
2577                 }
2578                 c += snprintf(c, s + CMDSIZE - c, "\n");
2579                 uxclnt(s, uxsock_timeout + 100);
2580                 exit(0);
2581         }
2582
2583         if (foreground) {
2584                 if (!isatty(fileno(stdout)))
2585                         setbuf(stdout, NULL);
2586                 err = 0;
2587                 daemon_pid = getpid();
2588         } else
2589                 err = daemonize();
2590
2591         if (err < 0)
2592                 /* error */
2593                 exit(1);
2594         else if (err > 0)
2595                 /* parent dies */
2596                 exit(0);
2597         else
2598                 /* child lives */
2599                 return (child(NULL));
2600 }
2601
2602 void *  mpath_pr_event_handler_fn (void * pathp )
2603 {
2604         struct multipath * mpp;
2605         int i,j, ret, isFound;
2606         struct path * pp = (struct path *)pathp;
2607         unsigned char *keyp;
2608         uint64_t prkey;
2609         struct prout_param_descriptor *param;
2610         struct prin_resp *resp;
2611
2612         mpp = pp->mpp;
2613
2614         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2615         if (!resp){
2616                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2617                 return NULL;
2618         }
2619
2620         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2621         if (ret != MPATH_PR_SUCCESS )
2622         {
2623                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2624                 goto out;
2625         }
2626
2627         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2628                         resp->prin_descriptor.prin_readkeys.additional_length );
2629
2630         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2631         {
2632                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2633                 ret = MPATH_PR_SUCCESS;
2634                 goto out;
2635         }
2636         prkey = 0;
2637         keyp = (unsigned char *)mpp->reservation_key;
2638         for (j = 0; j < 8; ++j) {
2639                 if (j > 0)
2640                         prkey <<= 8;
2641                 prkey |= *keyp;
2642                 ++keyp;
2643         }
2644         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2645
2646         isFound =0;
2647         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2648         {
2649                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2650                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2651                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2652                 {
2653                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2654                         isFound =1;
2655                         break;
2656                 }
2657         }
2658         if (!isFound)
2659         {
2660                 condlog(0, "%s: Either device not registered or ", pp->dev);
2661                 condlog(0, "host is not authorised for registration. Skip path");
2662                 ret = MPATH_PR_OTHER;
2663                 goto out;
2664         }
2665
2666         param= malloc(sizeof(struct prout_param_descriptor));
2667         memset(param, 0 , sizeof(struct prout_param_descriptor));
2668
2669         for (j = 7; j >= 0; --j) {
2670                 param->sa_key[j] = (prkey & 0xff);
2671                 prkey >>= 8;
2672         }
2673         param->num_transportid = 0;
2674
2675         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2676
2677         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2678         if (ret != MPATH_PR_SUCCESS )
2679         {
2680                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2681         }
2682         mpp->prflag = 1;
2683
2684         free(param);
2685 out:
2686         free(resp);
2687         return NULL;
2688 }
2689
2690 int mpath_pr_event_handle(struct path *pp)
2691 {
2692         pthread_t thread;
2693         int rc;
2694         pthread_attr_t attr;
2695         struct multipath * mpp;
2696
2697         mpp = pp->mpp;
2698
2699         if (!mpp->reservation_key)
2700                 return -1;
2701
2702         pthread_attr_init(&attr);
2703         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2704
2705         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2706         if (rc) {
2707                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2708                 return -1;
2709         }
2710         pthread_attr_destroy(&attr);
2711         rc = pthread_join(thread, NULL);
2712         return 0;
2713 }