Imported Upstream version 0.6.3
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18 #include <linux/oom.h>
19 #include <libudev.h>
20 #include <urcu.h>
21 #ifdef USE_SYSTEMD
22 #include <systemd/sd-daemon.h>
23 #endif
24 #include <semaphore.h>
25 #include <time.h>
26
27 /*
28  * libcheckers
29  */
30 #include "checkers.h"
31
32 #ifdef USE_SYSTEMD
33 static int use_watchdog;
34 #endif
35
36 int uxsock_timeout;
37
38 /*
39  * libmultipath
40  */
41 #include "parser.h"
42 #include "vector.h"
43 #include "memory.h"
44 #include "config.h"
45 #include "util.h"
46 #include "hwtable.h"
47 #include "defaults.h"
48 #include "structs.h"
49 #include "blacklist.h"
50 #include "structs_vec.h"
51 #include "dmparser.h"
52 #include "devmapper.h"
53 #include "sysfs.h"
54 #include "dict.h"
55 #include "discovery.h"
56 #include "debug.h"
57 #include "propsel.h"
58 #include "uevent.h"
59 #include "switchgroup.h"
60 #include "print.h"
61 #include "configure.h"
62 #include "prio.h"
63 #include "wwids.h"
64 #include "pgpolicies.h"
65 #include "uevent.h"
66 #include "log.h"
67
68 #include "mpath_cmd.h"
69 #include "mpath_persist.h"
70
71 #include "prioritizers/alua_rtpg.h"
72
73 #include "main.h"
74 #include "pidfile.h"
75 #include "uxlsnr.h"
76 #include "uxclnt.h"
77 #include "cli.h"
78 #include "cli_handlers.h"
79 #include "lock.h"
80 #include "waiter.h"
81 #include "wwids.h"
82
83 #define FILE_NAME_SIZE 256
84 #define CMDSIZE 160
85
86 #define LOG_MSG(a, b) \
87 do { \
88         if (pp->offline) \
89                 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
90         else if (strlen(b)) \
91                 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
92 } while(0)
93
94 struct mpath_event_param
95 {
96         char * devname;
97         struct multipath *mpp;
98 };
99
100 unsigned int mpath_mx_alloc_len;
101
102 int logsink;
103 int verbosity;
104 int bindings_read_only;
105 int ignore_new_devs;
106 enum daemon_status running_state = DAEMON_INIT;
107 pid_t daemon_pid;
108 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
109 pthread_cond_t config_cond = PTHREAD_COND_INITIALIZER;
110
111 /*
112  * global copy of vecs for use in sig handlers
113  */
114 struct vectors * gvecs;
115
116 struct udev * udev;
117
118 struct config *multipath_conf;
119
120 /* Local variables */
121 static volatile sig_atomic_t exit_sig;
122 static volatile sig_atomic_t reconfig_sig;
123 static volatile sig_atomic_t log_reset_sig;
124
125 const char *
126 daemon_status(void)
127 {
128         switch (running_state) {
129         case DAEMON_INIT:
130                 return "init";
131         case DAEMON_START:
132                 return "startup";
133         case DAEMON_CONFIGURE:
134                 return "configure";
135         case DAEMON_IDLE:
136                 return "idle";
137         case DAEMON_RUNNING:
138                 return "running";
139         case DAEMON_SHUTDOWN:
140                 return "shutdown";
141         }
142         return NULL;
143 }
144
145 /*
146  * I love you too, systemd ...
147  */
148 const char *
149 sd_notify_status(void)
150 {
151         switch (running_state) {
152         case DAEMON_INIT:
153                 return "STATUS=init";
154         case DAEMON_START:
155                 return "STATUS=startup";
156         case DAEMON_CONFIGURE:
157                 return "STATUS=configure";
158         case DAEMON_IDLE:
159                 return "STATUS=idle";
160         case DAEMON_RUNNING:
161                 return "STATUS=running";
162         case DAEMON_SHUTDOWN:
163                 return "STATUS=shutdown";
164         }
165         return NULL;
166 }
167
168 static void config_cleanup(void *arg)
169 {
170         pthread_mutex_unlock(&config_lock);
171 }
172
173 void post_config_state(enum daemon_status state)
174 {
175         pthread_mutex_lock(&config_lock);
176         if (state != running_state) {
177                 running_state = state;
178                 pthread_cond_broadcast(&config_cond);
179 #ifdef USE_SYSTEMD
180                 sd_notify(0, sd_notify_status());
181 #endif
182         }
183         pthread_mutex_unlock(&config_lock);
184 }
185
186 int set_config_state(enum daemon_status state)
187 {
188         int rc = 0;
189
190         pthread_cleanup_push(config_cleanup, NULL);
191         pthread_mutex_lock(&config_lock);
192         if (running_state != state) {
193                 if (running_state != DAEMON_IDLE) {
194                         struct timespec ts;
195
196                         clock_gettime(CLOCK_REALTIME, &ts);
197                         ts.tv_sec += 1;
198                         rc = pthread_cond_timedwait(&config_cond,
199                                                     &config_lock, &ts);
200                 }
201                 if (!rc) {
202                         running_state = state;
203                         pthread_cond_broadcast(&config_cond);
204 #ifdef USE_SYSTEMD
205                         sd_notify(0, sd_notify_status());
206 #endif
207                 }
208         }
209         pthread_cleanup_pop(1);
210         return rc;
211 }
212
213 struct config *get_multipath_config(void)
214 {
215         rcu_read_lock();
216         return rcu_dereference(multipath_conf);
217 }
218
219 void put_multipath_config(struct config *conf)
220 {
221         rcu_read_unlock();
222 }
223
224 static int
225 need_switch_pathgroup (struct multipath * mpp, int refresh)
226 {
227         struct pathgroup * pgp;
228         struct path * pp;
229         unsigned int i, j;
230         struct config *conf;
231
232         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
233                 return 0;
234
235         /*
236          * Refresh path priority values
237          */
238         if (refresh) {
239                 vector_foreach_slot (mpp->pg, pgp, i) {
240                         vector_foreach_slot (pgp->paths, pp, j) {
241                                 conf = get_multipath_config();
242                                 pathinfo(pp, conf, DI_PRIO);
243                                 put_multipath_config(conf);
244                         }
245                 }
246         }
247
248         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
249                 return 0;
250
251         mpp->bestpg = select_path_group(mpp);
252
253         if (mpp->bestpg != mpp->nextpg)
254                 return 1;
255
256         return 0;
257 }
258
259 static void
260 switch_pathgroup (struct multipath * mpp)
261 {
262         mpp->stat_switchgroup++;
263         dm_switchgroup(mpp->alias, mpp->bestpg);
264         condlog(2, "%s: switch to path group #%i",
265                  mpp->alias, mpp->bestpg);
266 }
267
268 static int
269 coalesce_maps(struct vectors *vecs, vector nmpv)
270 {
271         struct multipath * ompp;
272         vector ompv = vecs->mpvec;
273         unsigned int i, reassign_maps;
274         struct config *conf;
275
276         conf = get_multipath_config();
277         reassign_maps = conf->reassign_maps;
278         put_multipath_config(conf);
279         vector_foreach_slot (ompv, ompp, i) {
280                 condlog(3, "%s: coalesce map", ompp->alias);
281                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
282                         /*
283                          * remove all current maps not allowed by the
284                          * current configuration
285                          */
286                         if (dm_flush_map(ompp->alias)) {
287                                 condlog(0, "%s: unable to flush devmap",
288                                         ompp->alias);
289                                 /*
290                                  * may be just because the device is open
291                                  */
292                                 if (setup_multipath(vecs, ompp) != 0) {
293                                         i--;
294                                         continue;
295                                 }
296                                 if (!vector_alloc_slot(nmpv))
297                                         return 1;
298
299                                 vector_set_slot(nmpv, ompp);
300
301                                 vector_del_slot(ompv, i);
302                                 i--;
303                         }
304                         else {
305                                 dm_lib_release();
306                                 condlog(2, "%s devmap removed", ompp->alias);
307                         }
308                 } else if (reassign_maps) {
309                         condlog(3, "%s: Reassign existing device-mapper"
310                                 " devices", ompp->alias);
311                         dm_reassign(ompp->alias);
312                 }
313         }
314         return 0;
315 }
316
317 void
318 sync_map_state(struct multipath *mpp)
319 {
320         struct pathgroup *pgp;
321         struct path *pp;
322         unsigned int i, j;
323
324         if (!mpp->pg)
325                 return;
326
327         vector_foreach_slot (mpp->pg, pgp, i){
328                 vector_foreach_slot (pgp->paths, pp, j){
329                         if (pp->state == PATH_UNCHECKED ||
330                             pp->state == PATH_WILD ||
331                             pp->state == PATH_DELAYED)
332                                 continue;
333                         if ((pp->dmstate == PSTATE_FAILED ||
334                              pp->dmstate == PSTATE_UNDEF) &&
335                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
336                                 dm_reinstate_path(mpp->alias, pp->dev_t);
337                         else if ((pp->dmstate == PSTATE_ACTIVE ||
338                                   pp->dmstate == PSTATE_UNDEF) &&
339                                  (pp->state == PATH_DOWN ||
340                                   pp->state == PATH_SHAKY))
341                                 dm_fail_path(mpp->alias, pp->dev_t);
342                 }
343         }
344 }
345
346 static void
347 sync_maps_state(vector mpvec)
348 {
349         unsigned int i;
350         struct multipath *mpp;
351
352         vector_foreach_slot (mpvec, mpp, i)
353                 sync_map_state(mpp);
354 }
355
356 static int
357 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
358 {
359         int r;
360
361         if (nopaths)
362                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
363         else
364                 r = dm_flush_map(mpp->alias);
365         /*
366          * clear references to this map before flushing so we can ignore
367          * the spurious uevent we may generate with the dm_flush_map call below
368          */
369         if (r) {
370                 /*
371                  * May not really be an error -- if the map was already flushed
372                  * from the device mapper by dmsetup(8) for instance.
373                  */
374                 if (r == 1)
375                         condlog(0, "%s: can't flush", mpp->alias);
376                 else {
377                         condlog(2, "%s: devmap deferred remove", mpp->alias);
378                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
379                 }
380                 return r;
381         }
382         else {
383                 dm_lib_release();
384                 condlog(2, "%s: map flushed", mpp->alias);
385         }
386
387         orphan_paths(vecs->pathvec, mpp);
388         remove_map_and_stop_waiter(mpp, vecs, 1);
389
390         return 0;
391 }
392
393 int
394 update_map (struct multipath *mpp, struct vectors *vecs)
395 {
396         int retries = 3;
397         char params[PARAMS_SIZE] = {0};
398
399 retry:
400         condlog(4, "%s: updating new map", mpp->alias);
401         if (adopt_paths(vecs->pathvec, mpp)) {
402                 condlog(0, "%s: failed to adopt paths for new map update",
403                         mpp->alias);
404                 retries = -1;
405                 goto fail;
406         }
407         verify_paths(mpp, vecs);
408         mpp->flush_on_last_del = FLUSH_UNDEF;
409         mpp->action = ACT_RELOAD;
410
411         if (setup_map(mpp, params, PARAMS_SIZE)) {
412                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
413                 retries = -1;
414                 goto fail;
415         }
416         if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
417                 condlog(0, "%s: map_udate sleep", mpp->alias);
418                 sleep(1);
419                 goto retry;
420         }
421         dm_lib_release();
422
423 fail:
424         if (setup_multipath(vecs, mpp))
425                 return 1;
426
427         sync_map_state(mpp);
428
429         if (retries < 0)
430                 condlog(0, "%s: failed reload in new map update", mpp->alias);
431         return 0;
432 }
433
434 static int
435 uev_add_map (struct uevent * uev, struct vectors * vecs)
436 {
437         char *alias;
438         int major = -1, minor = -1, rc;
439
440         condlog(3, "%s: add map (uevent)", uev->kernel);
441         alias = uevent_get_dm_name(uev);
442         if (!alias) {
443                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
444                 major = uevent_get_major(uev);
445                 minor = uevent_get_minor(uev);
446                 alias = dm_mapname(major, minor);
447                 if (!alias) {
448                         condlog(2, "%s: mapname not found for %d:%d",
449                                 uev->kernel, major, minor);
450                         return 1;
451                 }
452         }
453         pthread_cleanup_push(cleanup_lock, &vecs->lock);
454         lock(&vecs->lock);
455         pthread_testcancel();
456         rc = ev_add_map(uev->kernel, alias, vecs);
457         lock_cleanup_pop(vecs->lock);
458         FREE(alias);
459         return rc;
460 }
461
462 int
463 ev_add_map (char * dev, char * alias, struct vectors * vecs)
464 {
465         char * refwwid;
466         struct multipath * mpp;
467         int map_present;
468         int r = 1, delayed_reconfig, reassign_maps;
469         struct config *conf;
470
471         map_present = dm_map_present(alias);
472
473         if (map_present && !dm_is_mpath(alias)) {
474                 condlog(4, "%s: not a multipath map", alias);
475                 return 0;
476         }
477
478         mpp = find_mp_by_alias(vecs->mpvec, alias);
479
480         if (mpp) {
481                 if (mpp->wait_for_udev > 1) {
482                         if (update_map(mpp, vecs))
483                                 /* setup multipathd removed the map */
484                                 return 1;
485                 }
486                 conf = get_multipath_config();
487                 delayed_reconfig = conf->delayed_reconfig;
488                 reassign_maps = conf->reassign_maps;
489                 put_multipath_config(conf);
490                 if (mpp->wait_for_udev) {
491                         mpp->wait_for_udev = 0;
492                         if (delayed_reconfig &&
493                             !need_to_delay_reconfig(vecs)) {
494                                 condlog(2, "reconfigure (delayed)");
495                                 set_config_state(DAEMON_CONFIGURE);
496                                 return 0;
497                         }
498                 }
499                 /*
500                  * Not really an error -- we generate our own uevent
501                  * if we create a multipath mapped device as a result
502                  * of uev_add_path
503                  */
504                 if (reassign_maps) {
505                         condlog(3, "%s: Reassign existing device-mapper devices",
506                                 alias);
507                         dm_reassign(alias);
508                 }
509                 return 0;
510         }
511         condlog(2, "%s: adding map", alias);
512
513         /*
514          * now we can register the map
515          */
516         if (map_present) {
517                 if ((mpp = add_map_without_path(vecs, alias))) {
518                         sync_map_state(mpp);
519                         condlog(2, "%s: devmap %s registered", alias, dev);
520                         return 0;
521                 } else {
522                         condlog(2, "%s: uev_add_map failed", dev);
523                         return 1;
524                 }
525         }
526         r = get_refwwid(CMD_NONE, dev, DEV_DEVMAP, vecs->pathvec, &refwwid);
527
528         if (refwwid) {
529                 r = coalesce_paths(vecs, NULL, refwwid, 0, CMD_NONE);
530                 dm_lib_release();
531         }
532
533         if (!r)
534                 condlog(2, "%s: devmap %s added", alias, dev);
535         else if (r == 2)
536                 condlog(2, "%s: uev_add_map %s blacklisted", alias, dev);
537         else
538                 condlog(0, "%s: uev_add_map %s failed", alias, dev);
539
540         FREE(refwwid);
541         return r;
542 }
543
544 static int
545 uev_remove_map (struct uevent * uev, struct vectors * vecs)
546 {
547         char *alias;
548         int minor;
549         struct multipath *mpp;
550
551         condlog(2, "%s: remove map (uevent)", uev->kernel);
552         alias = uevent_get_dm_name(uev);
553         if (!alias) {
554                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
555                 return 0;
556         }
557         minor = uevent_get_minor(uev);
558
559         pthread_cleanup_push(cleanup_lock, &vecs->lock);
560         lock(&vecs->lock);
561         pthread_testcancel();
562         mpp = find_mp_by_minor(vecs->mpvec, minor);
563
564         if (!mpp) {
565                 condlog(2, "%s: devmap not registered, can't remove",
566                         uev->kernel);
567                 goto out;
568         }
569         if (strcmp(mpp->alias, alias)) {
570                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
571                         mpp->alias, mpp->dmi->minor, minor);
572                 goto out;
573         }
574
575         orphan_paths(vecs->pathvec, mpp);
576         remove_map_and_stop_waiter(mpp, vecs, 1);
577 out:
578         lock_cleanup_pop(vecs->lock);
579         FREE(alias);
580         return 0;
581 }
582
583 /* Called from CLI handler */
584 int
585 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
586 {
587         struct multipath * mpp;
588
589         mpp = find_mp_by_minor(vecs->mpvec, minor);
590
591         if (!mpp) {
592                 condlog(2, "%s: devmap not registered, can't remove",
593                         devname);
594                 return 1;
595         }
596         if (strcmp(mpp->alias, alias)) {
597                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
598                         mpp->alias, mpp->dmi->minor, minor);
599                 return 1;
600         }
601         return flush_map(mpp, vecs, 0);
602 }
603
604 static int
605 uev_add_path (struct uevent *uev, struct vectors * vecs)
606 {
607         struct path *pp;
608         int ret = 0, i;
609         struct config *conf;
610
611         condlog(2, "%s: add path (uevent)", uev->kernel);
612         if (strstr(uev->kernel, "..") != NULL) {
613                 /*
614                  * Don't allow relative device names in the pathvec
615                  */
616                 condlog(0, "%s: path name is invalid", uev->kernel);
617                 return 1;
618         }
619
620         pthread_cleanup_push(cleanup_lock, &vecs->lock);
621         lock(&vecs->lock);
622         pthread_testcancel();
623         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
624         if (pp) {
625                 int r;
626
627                 condlog(0, "%s: spurious uevent, path already in pathvec",
628                         uev->kernel);
629                 if (!pp->mpp && !strlen(pp->wwid)) {
630                         condlog(3, "%s: reinitialize path", uev->kernel);
631                         udev_device_unref(pp->udev);
632                         pp->udev = udev_device_ref(uev->udev);
633                         conf = get_multipath_config();
634                         r = pathinfo(pp, conf,
635                                      DI_ALL | DI_BLACKLIST);
636                         put_multipath_config(conf);
637                         if (r == PATHINFO_OK)
638                                 ret = ev_add_path(pp, vecs);
639                         else if (r == PATHINFO_SKIPPED) {
640                                 condlog(3, "%s: remove blacklisted path",
641                                         uev->kernel);
642                                 i = find_slot(vecs->pathvec, (void *)pp);
643                                 if (i != -1)
644                                         vector_del_slot(vecs->pathvec, i);
645                                 free_path(pp);
646                         } else {
647                                 condlog(0, "%s: failed to reinitialize path",
648                                         uev->kernel);
649                                 ret = 1;
650                         }
651                 }
652         }
653         lock_cleanup_pop(vecs->lock);
654         if (pp)
655                 return ret;
656
657         /*
658          * get path vital state
659          */
660         conf = get_multipath_config();
661         ret = alloc_path_with_pathinfo(conf, uev->udev,
662                                        DI_ALL, &pp);
663         put_multipath_config(conf);
664         if (!pp) {
665                 if (ret == PATHINFO_SKIPPED)
666                         return 0;
667                 condlog(3, "%s: failed to get path info", uev->kernel);
668                 return 1;
669         }
670         pthread_cleanup_push(cleanup_lock, &vecs->lock);
671         lock(&vecs->lock);
672         pthread_testcancel();
673         ret = store_path(vecs->pathvec, pp);
674         if (!ret) {
675                 conf = get_multipath_config();
676                 pp->checkint = conf->checkint;
677                 put_multipath_config(conf);
678                 ret = ev_add_path(pp, vecs);
679         } else {
680                 condlog(0, "%s: failed to store path info, "
681                         "dropping event",
682                         uev->kernel);
683                 free_path(pp);
684                 ret = 1;
685         }
686         lock_cleanup_pop(vecs->lock);
687         return ret;
688 }
689
690 /*
691  * returns:
692  * 0: added
693  * 1: error
694  */
695 int
696 ev_add_path (struct path * pp, struct vectors * vecs)
697 {
698         struct multipath * mpp;
699         char params[PARAMS_SIZE] = {0};
700         int retries = 3;
701         int start_waiter = 0;
702         int ret;
703
704         /*
705          * need path UID to go any further
706          */
707         if (strlen(pp->wwid) == 0) {
708                 condlog(0, "%s: failed to get path uid", pp->dev);
709                 goto fail; /* leave path added to pathvec */
710         }
711         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
712         if (mpp && mpp->wait_for_udev) {
713                 mpp->wait_for_udev = 2;
714                 orphan_path(pp, "waiting for create to complete");
715                 return 0;
716         }
717
718         pp->mpp = mpp;
719 rescan:
720         if (mpp) {
721                 if (pp->size && mpp->size != pp->size) {
722                         condlog(0, "%s: failed to add new path %s, "
723                                 "device size mismatch",
724                                 mpp->alias, pp->dev);
725                         int i = find_slot(vecs->pathvec, (void *)pp);
726                         if (i != -1)
727                                 vector_del_slot(vecs->pathvec, i);
728                         free_path(pp);
729                         return 1;
730                 }
731
732                 condlog(4,"%s: adopting all paths for path %s",
733                         mpp->alias, pp->dev);
734                 if (adopt_paths(vecs->pathvec, mpp))
735                         goto fail; /* leave path added to pathvec */
736
737                 verify_paths(mpp, vecs);
738                 mpp->flush_on_last_del = FLUSH_UNDEF;
739                 mpp->action = ACT_RELOAD;
740         } else {
741                 if (!should_multipath(pp, vecs->pathvec)) {
742                         orphan_path(pp, "only one path");
743                         return 0;
744                 }
745                 condlog(4,"%s: creating new map", pp->dev);
746                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
747                         mpp->action = ACT_CREATE;
748                         /*
749                          * We don't depend on ACT_CREATE, as domap will
750                          * set it to ACT_NOTHING when complete.
751                          */
752                         start_waiter = 1;
753                 }
754                 if (!start_waiter)
755                         goto fail; /* leave path added to pathvec */
756         }
757
758         /* persistent reservation check*/
759         mpath_pr_event_handle(pp);
760
761         /*
762          * push the map to the device-mapper
763          */
764         if (setup_map(mpp, params, PARAMS_SIZE)) {
765                 condlog(0, "%s: failed to setup map for addition of new "
766                         "path %s", mpp->alias, pp->dev);
767                 goto fail_map;
768         }
769         /*
770          * reload the map for the multipath mapped device
771          */
772 retry:
773         ret = domap(mpp, params, 1);
774         if (ret <= 0) {
775                 if (ret < 0 && retries-- > 0) {
776                         condlog(0, "%s: retry domap for addition of new "
777                                 "path %s", mpp->alias, pp->dev);
778                         sleep(1);
779                         goto retry;
780                 }
781                 condlog(0, "%s: failed in domap for addition of new "
782                         "path %s", mpp->alias, pp->dev);
783                 /*
784                  * deal with asynchronous uevents :((
785                  */
786                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
787                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
788                         sleep(1);
789                         update_mpp_paths(mpp, vecs->pathvec);
790                         goto rescan;
791                 }
792                 else if (mpp->action == ACT_RELOAD)
793                         condlog(0, "%s: giving up reload", mpp->alias);
794                 else
795                         goto fail_map;
796         }
797         dm_lib_release();
798
799         /*
800          * update our state from kernel regardless of create or reload
801          */
802         if (setup_multipath(vecs, mpp))
803                 goto fail; /* if setup_multipath fails, it removes the map */
804
805         sync_map_state(mpp);
806
807         if ((mpp->action == ACT_CREATE ||
808              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
809             start_waiter_thread(mpp, vecs))
810                         goto fail_map;
811
812         if (retries >= 0) {
813                 condlog(2, "%s [%s]: path added to devmap %s",
814                         pp->dev, pp->dev_t, mpp->alias);
815                 return 0;
816         } else
817                 goto fail;
818
819 fail_map:
820         remove_map(mpp, vecs, 1);
821 fail:
822         orphan_path(pp, "failed to add path");
823         return 1;
824 }
825
826 static int
827 uev_remove_path (struct uevent *uev, struct vectors * vecs)
828 {
829         struct path *pp;
830         int ret;
831
832         condlog(2, "%s: remove path (uevent)", uev->kernel);
833         pthread_cleanup_push(cleanup_lock, &vecs->lock);
834         lock(&vecs->lock);
835         pthread_testcancel();
836         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
837         if (pp)
838                 ret = ev_remove_path(pp, vecs);
839         lock_cleanup_pop(vecs->lock);
840         if (!pp) {
841                 /* Not an error; path might have been purged earlier */
842                 condlog(0, "%s: path already removed", uev->kernel);
843                 return 0;
844         }
845         return ret;
846 }
847
848 int
849 ev_remove_path (struct path *pp, struct vectors * vecs)
850 {
851         struct multipath * mpp;
852         int i, retval = 0;
853         char params[PARAMS_SIZE] = {0};
854
855         /*
856          * avoid referring to the map of an orphaned path
857          */
858         if ((mpp = pp->mpp)) {
859                 /*
860                  * transform the mp->pg vector of vectors of paths
861                  * into a mp->params string to feed the device-mapper
862                  */
863                 if (update_mpp_paths(mpp, vecs->pathvec)) {
864                         condlog(0, "%s: failed to update paths",
865                                 mpp->alias);
866                         goto fail;
867                 }
868                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
869                         vector_del_slot(mpp->paths, i);
870
871                 /*
872                  * remove the map IFF removing the last path
873                  */
874                 if (VECTOR_SIZE(mpp->paths) == 0) {
875                         char alias[WWID_SIZE];
876
877                         /*
878                          * flush_map will fail if the device is open
879                          */
880                         strncpy(alias, mpp->alias, WWID_SIZE);
881                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
882                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
883                                 mpp->retry_tick = 0;
884                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
885                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
886                                 dm_queue_if_no_path(mpp->alias, 0);
887                         }
888                         if (!flush_map(mpp, vecs, 1)) {
889                                 condlog(2, "%s: removed map after"
890                                         " removing all paths",
891                                         alias);
892                                 retval = 0;
893                                 goto out;
894                         }
895                         /*
896                          * Not an error, continue
897                          */
898                 }
899
900                 if (setup_map(mpp, params, PARAMS_SIZE)) {
901                         condlog(0, "%s: failed to setup map for"
902                                 " removal of path %s", mpp->alias, pp->dev);
903                         goto fail;
904                 }
905
906                 if (mpp->wait_for_udev) {
907                         mpp->wait_for_udev = 2;
908                         goto out;
909                 }
910
911                 /*
912                  * reload the map
913                  */
914                 mpp->action = ACT_RELOAD;
915                 if (domap(mpp, params, 1) <= 0) {
916                         condlog(0, "%s: failed in domap for "
917                                 "removal of path %s",
918                                 mpp->alias, pp->dev);
919                         retval = 1;
920                 } else {
921                         /*
922                          * update our state from kernel
923                          */
924                         if (setup_multipath(vecs, mpp))
925                                 return 1;
926                         sync_map_state(mpp);
927
928                         condlog(2, "%s [%s]: path removed from map %s",
929                                 pp->dev, pp->dev_t, mpp->alias);
930                 }
931         }
932
933 out:
934         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
935                 vector_del_slot(vecs->pathvec, i);
936
937         free_path(pp);
938
939         return retval;
940
941 fail:
942         remove_map_and_stop_waiter(mpp, vecs, 1);
943         return 1;
944 }
945
946 static int
947 uev_update_path (struct uevent *uev, struct vectors * vecs)
948 {
949         int ro, retval = 0;
950
951         ro = uevent_get_disk_ro(uev);
952
953         if (ro >= 0) {
954                 struct path * pp;
955                 struct multipath *mpp = NULL;
956
957                 condlog(2, "%s: update path write_protect to '%d' (uevent)",
958                         uev->kernel, ro);
959                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
960                 lock(&vecs->lock);
961                 pthread_testcancel();
962                 /*
963                  * pthread_mutex_lock() and pthread_mutex_unlock()
964                  * need to be at the same indentation level, hence
965                  * this slightly convoluted codepath.
966                  */
967                 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
968                 if (pp) {
969                         if (pp->initialized == INIT_REQUESTED_UDEV) {
970                                 retval = 2;
971                         } else {
972                                 mpp = pp->mpp;
973                                 if (mpp && mpp->wait_for_udev) {
974                                         mpp->wait_for_udev = 2;
975                                         mpp = NULL;
976                                         retval = 0;
977                                 }
978                         }
979                         if (mpp) {
980                                 retval = reload_map(vecs, mpp, 0, 1);
981
982                                 condlog(2, "%s: map %s reloaded (retval %d)",
983                                         uev->kernel, mpp->alias, retval);
984                         }
985                 }
986                 lock_cleanup_pop(vecs->lock);
987                 if (!pp) {
988                         condlog(0, "%s: spurious uevent, path not found",
989                                 uev->kernel);
990                         return 1;
991                 }
992                 if (retval == 2)
993                         return uev_add_path(uev, vecs);
994         }
995
996         return retval;
997 }
998
999 static int
1000 map_discovery (struct vectors * vecs)
1001 {
1002         struct multipath * mpp;
1003         unsigned int i;
1004
1005         if (dm_get_maps(vecs->mpvec))
1006                 return 1;
1007
1008         vector_foreach_slot (vecs->mpvec, mpp, i)
1009                 if (setup_multipath(vecs, mpp))
1010                         return 1;
1011
1012         return 0;
1013 }
1014
1015 int
1016 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
1017 {
1018         struct vectors * vecs;
1019         int r;
1020
1021         *reply = NULL;
1022         *len = 0;
1023         vecs = (struct vectors *)trigger_data;
1024
1025         r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1026
1027         if (r > 0) {
1028                 if (r == ETIMEDOUT)
1029                         *reply = STRDUP("timeout\n");
1030                 else
1031                         *reply = STRDUP("fail\n");
1032                 *len = strlen(*reply) + 1;
1033                 r = 1;
1034         }
1035         else if (!r && *len == 0) {
1036                 *reply = STRDUP("ok\n");
1037                 *len = strlen(*reply) + 1;
1038                 r = 0;
1039         }
1040         /* else if (r < 0) leave *reply alone */
1041
1042         return r;
1043 }
1044
1045 static int
1046 uev_discard(char * devpath)
1047 {
1048         char *tmp;
1049         char a[11], b[11];
1050
1051         /*
1052          * keep only block devices, discard partitions
1053          */
1054         tmp = strstr(devpath, "/block/");
1055         if (tmp == NULL){
1056                 condlog(4, "no /block/ in '%s'", devpath);
1057                 return 1;
1058         }
1059         if (sscanf(tmp, "/block/%10s", a) != 1 ||
1060             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
1061                 condlog(4, "discard event on %s", devpath);
1062                 return 1;
1063         }
1064         return 0;
1065 }
1066
1067 int
1068 uev_trigger (struct uevent * uev, void * trigger_data)
1069 {
1070         int r = 0;
1071         struct vectors * vecs;
1072         struct config *conf;
1073
1074         vecs = (struct vectors *)trigger_data;
1075
1076         if (uev_discard(uev->devpath))
1077                 return 0;
1078
1079         pthread_cleanup_push(config_cleanup, NULL);
1080         pthread_mutex_lock(&config_lock);
1081         if (running_state != DAEMON_IDLE &&
1082             running_state != DAEMON_RUNNING)
1083                 pthread_cond_wait(&config_cond, &config_lock);
1084         pthread_cleanup_pop(1);
1085
1086         if (running_state == DAEMON_SHUTDOWN)
1087                 return 0;
1088
1089         /*
1090          * device map event
1091          * Add events are ignored here as the tables
1092          * are not fully initialised then.
1093          */
1094         if (!strncmp(uev->kernel, "dm-", 3)) {
1095                 if (!strncmp(uev->action, "change", 6)) {
1096                         r = uev_add_map(uev, vecs);
1097                         goto out;
1098                 }
1099                 if (!strncmp(uev->action, "remove", 6)) {
1100                         r = uev_remove_map(uev, vecs);
1101                         goto out;
1102                 }
1103                 goto out;
1104         }
1105
1106         /*
1107          * path add/remove event
1108          */
1109         conf = get_multipath_config();
1110         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
1111                            uev->kernel) > 0) {
1112                 put_multipath_config(conf);
1113                 goto out;
1114         }
1115         put_multipath_config(conf);
1116
1117         if (!strncmp(uev->action, "add", 3)) {
1118                 r = uev_add_path(uev, vecs);
1119                 goto out;
1120         }
1121         if (!strncmp(uev->action, "remove", 6)) {
1122                 r = uev_remove_path(uev, vecs);
1123                 goto out;
1124         }
1125         if (!strncmp(uev->action, "change", 6)) {
1126                 r = uev_update_path(uev, vecs);
1127                 goto out;
1128         }
1129
1130 out:
1131         return r;
1132 }
1133
1134 static void rcu_unregister(void *param)
1135 {
1136         rcu_unregister_thread();
1137 }
1138
1139 static void *
1140 ueventloop (void * ap)
1141 {
1142         struct udev *udev = ap;
1143
1144         pthread_cleanup_push(rcu_unregister, NULL);
1145         rcu_register_thread();
1146         if (uevent_listen(udev))
1147                 condlog(0, "error starting uevent listener");
1148         pthread_cleanup_pop(1);
1149         return NULL;
1150 }
1151
1152 static void *
1153 uevqloop (void * ap)
1154 {
1155         pthread_cleanup_push(rcu_unregister, NULL);
1156         rcu_register_thread();
1157         if (uevent_dispatch(&uev_trigger, ap))
1158                 condlog(0, "error starting uevent dispatcher");
1159         pthread_cleanup_pop(1);
1160         return NULL;
1161 }
1162 static void *
1163 uxlsnrloop (void * ap)
1164 {
1165         if (cli_init()) {
1166                 condlog(1, "Failed to init uxsock listener");
1167                 return NULL;
1168         }
1169         pthread_cleanup_push(rcu_unregister, NULL);
1170         rcu_register_thread();
1171         set_handler_callback(LIST+PATHS, cli_list_paths);
1172         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1173         set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1174         set_handler_callback(LIST+PATH, cli_list_path);
1175         set_handler_callback(LIST+MAPS, cli_list_maps);
1176         set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1177         set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1178         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1179         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1180         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1181         set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1182         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1183         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1184         set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1185         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1186         set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1187         set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1188         set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1189         set_handler_callback(LIST+CONFIG, cli_list_config);
1190         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1191         set_handler_callback(LIST+DEVICES, cli_list_devices);
1192         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1193         set_handler_callback(ADD+PATH, cli_add_path);
1194         set_handler_callback(DEL+PATH, cli_del_path);
1195         set_handler_callback(ADD+MAP, cli_add_map);
1196         set_handler_callback(DEL+MAP, cli_del_map);
1197         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1198         set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1199         set_handler_callback(SUSPEND+MAP, cli_suspend);
1200         set_handler_callback(RESUME+MAP, cli_resume);
1201         set_handler_callback(RESIZE+MAP, cli_resize);
1202         set_handler_callback(RELOAD+MAP, cli_reload);
1203         set_handler_callback(RESET+MAP, cli_reassign);
1204         set_handler_callback(REINSTATE+PATH, cli_reinstate);
1205         set_handler_callback(FAIL+PATH, cli_fail);
1206         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1207         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1208         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1209         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1210         set_unlocked_handler_callback(QUIT, cli_quit);
1211         set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1212         set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1213         set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1214         set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1215         set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1216         set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1217
1218         umask(077);
1219         uxsock_listen(&uxsock_trigger, ap);
1220         pthread_cleanup_pop(1);
1221         return NULL;
1222 }
1223
1224 void
1225 exit_daemon (void)
1226 {
1227         post_config_state(DAEMON_SHUTDOWN);
1228 }
1229
1230 static void
1231 fail_path (struct path * pp, int del_active)
1232 {
1233         if (!pp->mpp)
1234                 return;
1235
1236         condlog(2, "checker failed path %s in map %s",
1237                  pp->dev_t, pp->mpp->alias);
1238
1239         dm_fail_path(pp->mpp->alias, pp->dev_t);
1240         if (del_active)
1241                 update_queue_mode_del_path(pp->mpp);
1242 }
1243
1244 /*
1245  * caller must have locked the path list before calling that function
1246  */
1247 static int
1248 reinstate_path (struct path * pp, int add_active)
1249 {
1250         int ret = 0;
1251
1252         if (!pp->mpp)
1253                 return 0;
1254
1255         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1256                 condlog(0, "%s: reinstate failed", pp->dev_t);
1257                 ret = 1;
1258         } else {
1259                 condlog(2, "%s: reinstated", pp->dev_t);
1260                 if (add_active)
1261                         update_queue_mode_add_path(pp->mpp);
1262         }
1263         return ret;
1264 }
1265
1266 static void
1267 enable_group(struct path * pp)
1268 {
1269         struct pathgroup * pgp;
1270
1271         /*
1272          * if path is added through uev_add_path, pgindex can be unset.
1273          * next update_strings() will set it, upon map reload event.
1274          *
1275          * we can safely return here, because upon map reload, all
1276          * PG will be enabled.
1277          */
1278         if (!pp->mpp->pg || !pp->pgindex)
1279                 return;
1280
1281         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1282
1283         if (pgp->status == PGSTATE_DISABLED) {
1284                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1285                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1286         }
1287 }
1288
1289 static void
1290 mpvec_garbage_collector (struct vectors * vecs)
1291 {
1292         struct multipath * mpp;
1293         unsigned int i;
1294
1295         if (!vecs->mpvec)
1296                 return;
1297
1298         vector_foreach_slot (vecs->mpvec, mpp, i) {
1299                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1300                         condlog(2, "%s: remove dead map", mpp->alias);
1301                         remove_map_and_stop_waiter(mpp, vecs, 1);
1302                         i--;
1303                 }
1304         }
1305 }
1306
1307 /* This is called after a path has started working again. It the multipath
1308  * device for this path uses the followover failback type, and this is the
1309  * best pathgroup, and this is the first path in the pathgroup to come back
1310  * up, then switch to this pathgroup */
1311 static int
1312 followover_should_failback(struct path * pp)
1313 {
1314         struct pathgroup * pgp;
1315         struct path *pp1;
1316         int i;
1317
1318         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1319             !pp->mpp->pg || !pp->pgindex ||
1320             pp->pgindex != pp->mpp->bestpg)
1321                 return 0;
1322
1323         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1324         vector_foreach_slot(pgp->paths, pp1, i) {
1325                 if (pp1 == pp)
1326                         continue;
1327                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1328                         return 0;
1329         }
1330         return 1;
1331 }
1332
1333 static void
1334 missing_uev_wait_tick(struct vectors *vecs)
1335 {
1336         struct multipath * mpp;
1337         unsigned int i;
1338         int timed_out = 0, delayed_reconfig;
1339         struct config *conf;
1340
1341         vector_foreach_slot (vecs->mpvec, mpp, i) {
1342                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1343                         timed_out = 1;
1344                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1345                         if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1346                                 /* update_map removed map */
1347                                 i--;
1348                                 continue;
1349                         }
1350                         mpp->wait_for_udev = 0;
1351                 }
1352         }
1353
1354         conf = get_multipath_config();
1355         delayed_reconfig = conf->delayed_reconfig;
1356         put_multipath_config(conf);
1357         if (timed_out && delayed_reconfig &&
1358             !need_to_delay_reconfig(vecs)) {
1359                 condlog(2, "reconfigure (delayed)");
1360                 set_config_state(DAEMON_CONFIGURE);
1361         }
1362 }
1363
1364 static void
1365 defered_failback_tick (vector mpvec)
1366 {
1367         struct multipath * mpp;
1368         unsigned int i;
1369
1370         vector_foreach_slot (mpvec, mpp, i) {
1371                 /*
1372                  * defered failback getting sooner
1373                  */
1374                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1375                         mpp->failback_tick--;
1376
1377                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1378                                 switch_pathgroup(mpp);
1379                 }
1380         }
1381 }
1382
1383 static void
1384 retry_count_tick(vector mpvec)
1385 {
1386         struct multipath *mpp;
1387         unsigned int i;
1388
1389         vector_foreach_slot (mpvec, mpp, i) {
1390                 if (mpp->retry_tick > 0) {
1391                         mpp->stat_total_queueing_time++;
1392                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1393                         if(--mpp->retry_tick == 0) {
1394                                 dm_queue_if_no_path(mpp->alias, 0);
1395                                 condlog(2, "%s: Disable queueing", mpp->alias);
1396                         }
1397                 }
1398         }
1399 }
1400
1401 int update_prio(struct path *pp, int refresh_all)
1402 {
1403         int oldpriority;
1404         struct path *pp1;
1405         struct pathgroup * pgp;
1406         int i, j, changed = 0;
1407         struct config *conf;
1408
1409         if (refresh_all) {
1410                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1411                         vector_foreach_slot (pgp->paths, pp1, j) {
1412                                 oldpriority = pp1->priority;
1413                                 conf = get_multipath_config();
1414                                 pathinfo(pp1, conf, DI_PRIO);
1415                                 put_multipath_config(conf);
1416                                 if (pp1->priority != oldpriority)
1417                                         changed = 1;
1418                         }
1419                 }
1420                 return changed;
1421         }
1422         oldpriority = pp->priority;
1423         conf = get_multipath_config();
1424         pathinfo(pp, conf, DI_PRIO);
1425         put_multipath_config(conf);
1426
1427         if (pp->priority == oldpriority)
1428                 return 0;
1429         return 1;
1430 }
1431
1432 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1433 {
1434         if (reload_map(vecs, mpp, refresh, 1))
1435                 return 1;
1436
1437         dm_lib_release();
1438         if (setup_multipath(vecs, mpp) != 0)
1439                 return 1;
1440         sync_map_state(mpp);
1441
1442         return 0;
1443 }
1444
1445 void repair_path(struct path * pp)
1446 {
1447         if (pp->state != PATH_DOWN)
1448                 return;
1449
1450         checker_repair(&pp->checker);
1451         if (strlen(checker_message(&pp->checker)))
1452                 LOG_MSG(1, checker_message(&pp->checker));
1453 }
1454
1455 /*
1456  * Returns '1' if the path has been checked, '-1' if it was blacklisted
1457  * and '0' otherwise
1458  */
1459 int
1460 check_path (struct vectors * vecs, struct path * pp, int ticks)
1461 {
1462         int newstate;
1463         int new_path_up = 0;
1464         int chkr_new_path_up = 0;
1465         int add_active;
1466         int disable_reinstate = 0;
1467         int oldchkrstate = pp->chkrstate;
1468         int retrigger_tries, checkint;
1469         struct config *conf;
1470         int ret;
1471
1472         if ((pp->initialized == INIT_OK ||
1473              pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1474                 return 0;
1475
1476         if (pp->tick)
1477                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1478         if (pp->tick)
1479                 return 0; /* don't check this path yet */
1480
1481         conf = get_multipath_config();
1482         retrigger_tries = conf->retrigger_tries;
1483         checkint = conf->checkint;
1484         put_multipath_config(conf);
1485         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1486             pp->retriggers < retrigger_tries) {
1487                 condlog(2, "%s: triggering change event to reinitialize",
1488                         pp->dev);
1489                 pp->initialized = INIT_REQUESTED_UDEV;
1490                 pp->retriggers++;
1491                 sysfs_attr_set_value(pp->udev, "uevent", "change",
1492                                      strlen("change"));
1493                 return 0;
1494         }
1495
1496         /*
1497          * provision a next check soonest,
1498          * in case we exit abnormaly from here
1499          */
1500         pp->tick = checkint;
1501
1502         newstate = path_offline(pp);
1503         /*
1504          * Wait for uevent for removed paths;
1505          * some LLDDs like zfcp keep paths unavailable
1506          * without sending uevents.
1507          */
1508         if (newstate == PATH_REMOVED)
1509                 newstate = PATH_DOWN;
1510
1511         if (newstate == PATH_UP) {
1512                 conf = get_multipath_config();
1513                 newstate = get_state(pp, conf, 1);
1514                 put_multipath_config(conf);
1515         } else
1516                 checker_clear_message(&pp->checker);
1517
1518         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1519                 condlog(2, "%s: unusable path", pp->dev);
1520                 conf = get_multipath_config();
1521                 pathinfo(pp, conf, 0);
1522                 put_multipath_config(conf);
1523                 return 1;
1524         }
1525         if (!pp->mpp) {
1526                 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1527                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
1528                         condlog(2, "%s: add missing path", pp->dev);
1529                         conf = get_multipath_config();
1530                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1531                         if (ret == PATHINFO_OK) {
1532                                 ev_add_path(pp, vecs);
1533                                 pp->tick = 1;
1534                         } else if (ret == PATHINFO_SKIPPED) {
1535                                 put_multipath_config(conf);
1536                                 return -1;
1537                         }
1538                         put_multipath_config(conf);
1539                 }
1540                 return 0;
1541         }
1542         /*
1543          * Async IO in flight. Keep the previous path state
1544          * and reschedule as soon as possible
1545          */
1546         if (newstate == PATH_PENDING) {
1547                 pp->tick = 1;
1548                 return 0;
1549         }
1550         /*
1551          * Synchronize with kernel state
1552          */
1553         if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1554                 condlog(1, "%s: Could not synchronize with kernel state",
1555                         pp->dev);
1556                 pp->dmstate = PSTATE_UNDEF;
1557         }
1558         /* if update_multipath_strings orphaned the path, quit early */
1559         if (!pp->mpp)
1560                 return 0;
1561
1562         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1563              pp->wait_checks > 0) {
1564                 if (pp->mpp && pp->mpp->nr_active > 0) {
1565                         pp->state = PATH_DELAYED;
1566                         pp->wait_checks--;
1567                         return 1;
1568                 } else
1569                         pp->wait_checks = 0;
1570         }
1571
1572         /*
1573          * don't reinstate failed path, if its in stand-by
1574          * and if target supports only implicit tpgs mode.
1575          * this will prevent unnecessary i/o by dm on stand-by
1576          * paths if there are no other active paths in map.
1577          */
1578         disable_reinstate = (newstate == PATH_GHOST &&
1579                             pp->mpp->nr_active == 0 &&
1580                             pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1581
1582         pp->chkrstate = newstate;
1583         if (newstate != pp->state) {
1584                 int oldstate = pp->state;
1585                 pp->state = newstate;
1586
1587                 if (strlen(checker_message(&pp->checker)))
1588                         LOG_MSG(1, checker_message(&pp->checker));
1589
1590                 /*
1591                  * upon state change, reset the checkint
1592                  * to the shortest delay
1593                  */
1594                 conf = get_multipath_config();
1595                 pp->checkint = conf->checkint;
1596                 put_multipath_config(conf);
1597
1598                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY) {
1599                         /*
1600                          * proactively fail path in the DM
1601                          */
1602                         if (oldstate == PATH_UP ||
1603                             oldstate == PATH_GHOST) {
1604                                 fail_path(pp, 1);
1605                                 if (pp->mpp->delay_wait_checks > 0 &&
1606                                     pp->watch_checks > 0) {
1607                                         pp->wait_checks = pp->mpp->delay_wait_checks;
1608                                         pp->watch_checks = 0;
1609                                 }
1610                         }else
1611                                 fail_path(pp, 0);
1612
1613                         /*
1614                          * cancel scheduled failback
1615                          */
1616                         pp->mpp->failback_tick = 0;
1617
1618                         pp->mpp->stat_path_failures++;
1619                         repair_path(pp);
1620                         return 1;
1621                 }
1622
1623                 if(newstate == PATH_UP || newstate == PATH_GHOST){
1624                         if ( pp->mpp && pp->mpp->prflag ){
1625                                 /*
1626                                  * Check Persistent Reservation.
1627                                  */
1628                         condlog(2, "%s: checking persistent reservation "
1629                                 "registration", pp->dev);
1630                         mpath_pr_event_handle(pp);
1631                         }
1632                 }
1633
1634                 /*
1635                  * reinstate this path
1636                  */
1637                 if (oldstate != PATH_UP &&
1638                     oldstate != PATH_GHOST) {
1639                         if (pp->mpp->delay_watch_checks > 0)
1640                                 pp->watch_checks = pp->mpp->delay_watch_checks;
1641                         add_active = 1;
1642                 } else {
1643                         if (pp->watch_checks > 0)
1644                                 pp->watch_checks--;
1645                         add_active = 0;
1646                 }
1647                 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1648                         condlog(3, "%s: reload map", pp->dev);
1649                         ev_add_path(pp, vecs);
1650                         pp->tick = 1;
1651                         return 0;
1652                 }
1653                 new_path_up = 1;
1654
1655                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1656                         chkr_new_path_up = 1;
1657
1658                 /*
1659                  * if at least one path is up in a group, and
1660                  * the group is disabled, re-enable it
1661                  */
1662                 if (newstate == PATH_UP)
1663                         enable_group(pp);
1664         }
1665         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1666                 if ((pp->dmstate == PSTATE_FAILED ||
1667                     pp->dmstate == PSTATE_UNDEF) &&
1668                     !disable_reinstate) {
1669                         /* Clear IO errors */
1670                         if (reinstate_path(pp, 0)) {
1671                                 condlog(3, "%s: reload map", pp->dev);
1672                                 ev_add_path(pp, vecs);
1673                                 pp->tick = 1;
1674                                 return 0;
1675                         }
1676                 } else {
1677                         unsigned int max_checkint;
1678                         LOG_MSG(4, checker_message(&pp->checker));
1679                         conf = get_multipath_config();
1680                         max_checkint = conf->max_checkint;
1681                         put_multipath_config(conf);
1682                         if (pp->checkint != max_checkint) {
1683                                 /*
1684                                  * double the next check delay.
1685                                  * max at conf->max_checkint
1686                                  */
1687                                 if (pp->checkint < (max_checkint / 2))
1688                                         pp->checkint = 2 * pp->checkint;
1689                                 else
1690                                         pp->checkint = max_checkint;
1691
1692                                 condlog(4, "%s: delay next check %is",
1693                                         pp->dev_t, pp->checkint);
1694                         }
1695                         if (pp->watch_checks > 0)
1696                                 pp->watch_checks--;
1697                         pp->tick = pp->checkint;
1698                 }
1699         }
1700         else if (newstate == PATH_DOWN &&
1701                  strlen(checker_message(&pp->checker))) {
1702                 int log_checker_err;
1703
1704                 conf = get_multipath_config();
1705                 log_checker_err = conf->log_checker_err;
1706                 put_multipath_config(conf);
1707                 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1708                         LOG_MSG(3, checker_message(&pp->checker));
1709                 else
1710                         LOG_MSG(2, checker_message(&pp->checker));
1711         }
1712
1713         pp->state = newstate;
1714         repair_path(pp);
1715
1716         if (pp->mpp->wait_for_udev)
1717                 return 1;
1718         /*
1719          * path prio refreshing
1720          */
1721         condlog(4, "path prio refresh");
1722
1723         if (update_prio(pp, new_path_up) &&
1724             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1725              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1726                 update_path_groups(pp->mpp, vecs, !new_path_up);
1727         else if (need_switch_pathgroup(pp->mpp, 0)) {
1728                 if (pp->mpp->pgfailback > 0 &&
1729                     (new_path_up || pp->mpp->failback_tick <= 0))
1730                         pp->mpp->failback_tick =
1731                                 pp->mpp->pgfailback + 1;
1732                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1733                          (chkr_new_path_up && followover_should_failback(pp)))
1734                         switch_pathgroup(pp->mpp);
1735         }
1736         return 1;
1737 }
1738
1739 static void *
1740 checkerloop (void *ap)
1741 {
1742         struct vectors *vecs;
1743         struct path *pp;
1744         int count = 0;
1745         unsigned int i;
1746         struct itimerval timer_tick_it;
1747         struct timeval last_time;
1748         struct config *conf;
1749
1750         pthread_cleanup_push(rcu_unregister, NULL);
1751         rcu_register_thread();
1752         mlockall(MCL_CURRENT | MCL_FUTURE);
1753         vecs = (struct vectors *)ap;
1754         condlog(2, "path checkers start up");
1755
1756         /*
1757          * init the path check interval
1758          */
1759         vector_foreach_slot (vecs->pathvec, pp, i) {
1760                 conf = get_multipath_config();
1761                 pp->checkint = conf->checkint;
1762                 put_multipath_config(conf);
1763         }
1764
1765         /* Tweak start time for initial path check */
1766         if (gettimeofday(&last_time, NULL) != 0)
1767                 last_time.tv_sec = 0;
1768         else
1769                 last_time.tv_sec -= 1;
1770
1771         while (1) {
1772                 struct timeval diff_time, start_time, end_time;
1773                 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1774                 sigset_t mask;
1775
1776                 if (gettimeofday(&start_time, NULL) != 0)
1777                         start_time.tv_sec = 0;
1778                 if (start_time.tv_sec && last_time.tv_sec) {
1779                         timersub(&start_time, &last_time, &diff_time);
1780                         condlog(4, "tick (%lu.%06lu secs)",
1781                                 diff_time.tv_sec, diff_time.tv_usec);
1782                         last_time.tv_sec = start_time.tv_sec;
1783                         last_time.tv_usec = start_time.tv_usec;
1784                         ticks = diff_time.tv_sec;
1785                 } else {
1786                         ticks = 1;
1787                         condlog(4, "tick (%d ticks)", ticks);
1788                 }
1789 #ifdef USE_SYSTEMD
1790                 if (use_watchdog)
1791                         sd_notify(0, "WATCHDOG=1");
1792 #endif
1793                 rc = set_config_state(DAEMON_RUNNING);
1794                 if (rc == ETIMEDOUT) {
1795                         condlog(4, "timeout waiting for DAEMON_IDLE");
1796                         continue;
1797                 }
1798                 if (vecs->pathvec) {
1799                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1800                         lock(&vecs->lock);
1801                         pthread_testcancel();
1802                         vector_foreach_slot (vecs->pathvec, pp, i) {
1803                                 rc = check_path(vecs, pp, ticks);
1804                                 if (rc < 0) {
1805                                         vector_del_slot(vecs->pathvec, i);
1806                                         free_path(pp);
1807                                         i--;
1808                                 } else
1809                                         num_paths += rc;
1810                         }
1811                         lock_cleanup_pop(vecs->lock);
1812                 }
1813                 if (vecs->mpvec) {
1814                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1815                         lock(&vecs->lock);
1816                         pthread_testcancel();
1817                         defered_failback_tick(vecs->mpvec);
1818                         retry_count_tick(vecs->mpvec);
1819                         missing_uev_wait_tick(vecs);
1820                         lock_cleanup_pop(vecs->lock);
1821                 }
1822                 if (count)
1823                         count--;
1824                 else {
1825                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1826                         lock(&vecs->lock);
1827                         pthread_testcancel();
1828                         condlog(4, "map garbage collection");
1829                         mpvec_garbage_collector(vecs);
1830                         count = MAPGCINT;
1831                         lock_cleanup_pop(vecs->lock);
1832                 }
1833
1834                 diff_time.tv_usec = 0;
1835                 if (start_time.tv_sec &&
1836                     gettimeofday(&end_time, NULL) == 0) {
1837                         timersub(&end_time, &start_time, &diff_time);
1838                         if (num_paths) {
1839                                 unsigned int max_checkint;
1840
1841                                 condlog(3, "checked %d path%s in %lu.%06lu secs",
1842                                         num_paths, num_paths > 1 ? "s" : "",
1843                                         diff_time.tv_sec, diff_time.tv_usec);
1844                                 conf = get_multipath_config();
1845                                 max_checkint = conf->max_checkint;
1846                                 put_multipath_config(conf);
1847                                 if (diff_time.tv_sec > max_checkint)
1848                                         condlog(1, "path checkers took longer "
1849                                                 "than %lu seconds, consider "
1850                                                 "increasing max_polling_interval",
1851                                                 diff_time.tv_sec);
1852                         }
1853                 }
1854
1855                 post_config_state(DAEMON_IDLE);
1856                 conf = get_multipath_config();
1857                 strict_timing = conf->strict_timing;
1858                 put_multipath_config(conf);
1859                 if (!strict_timing)
1860                         sleep(1);
1861                 else {
1862                         timer_tick_it.it_interval.tv_sec = 0;
1863                         timer_tick_it.it_interval.tv_usec = 0;
1864                         if (diff_time.tv_usec) {
1865                                 timer_tick_it.it_value.tv_sec = 0;
1866                                 timer_tick_it.it_value.tv_usec =
1867                                         (unsigned long)1000000 - diff_time.tv_usec;
1868                         } else {
1869                                 timer_tick_it.it_value.tv_sec = 1;
1870                                 timer_tick_it.it_value.tv_usec = 0;
1871                         }
1872                         setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1873
1874                         sigemptyset(&mask);
1875                         sigaddset(&mask, SIGALRM);
1876                         condlog(3, "waiting for %lu.%06lu secs",
1877                                 timer_tick_it.it_value.tv_sec,
1878                                 timer_tick_it.it_value.tv_usec);
1879                         if (sigwait(&mask, &signo) != 0) {
1880                                 condlog(3, "sigwait failed with error %d",
1881                                         errno);
1882                                 conf = get_multipath_config();
1883                                 conf->strict_timing = 0;
1884                                 put_multipath_config(conf);
1885                                 break;
1886                         }
1887                 }
1888         }
1889         pthread_cleanup_pop(1);
1890         return NULL;
1891 }
1892
1893 int
1894 configure (struct vectors * vecs, int start_waiters)
1895 {
1896         struct multipath * mpp;
1897         struct path * pp;
1898         vector mpvec;
1899         int i, ret;
1900         struct config *conf;
1901
1902         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1903                 return 1;
1904
1905         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1906                 return 1;
1907
1908         if (!(mpvec = vector_alloc()))
1909                 return 1;
1910
1911         /*
1912          * probe for current path (from sysfs) and map (from dm) sets
1913          */
1914         ret = path_discovery(vecs->pathvec, DI_ALL);
1915         if (ret < 0)
1916                 return 1;
1917
1918         vector_foreach_slot (vecs->pathvec, pp, i){
1919                 conf = get_multipath_config();
1920                 if (filter_path(conf, pp) > 0){
1921                         vector_del_slot(vecs->pathvec, i);
1922                         free_path(pp);
1923                         i--;
1924                 }
1925                 else
1926                         pp->checkint = conf->checkint;
1927                 put_multipath_config(conf);
1928         }
1929         if (map_discovery(vecs))
1930                 return 1;
1931
1932         /*
1933          * create new set of maps & push changed ones into dm
1934          */
1935         if (coalesce_paths(vecs, mpvec, NULL, 1, CMD_NONE))
1936                 return 1;
1937
1938         /*
1939          * may need to remove some maps which are no longer relevant
1940          * e.g., due to blacklist changes in conf file
1941          */
1942         if (coalesce_maps(vecs, mpvec))
1943                 return 1;
1944
1945         dm_lib_release();
1946
1947         sync_maps_state(mpvec);
1948         vector_foreach_slot(mpvec, mpp, i){
1949                 remember_wwid(mpp->wwid);
1950                 update_map_pr(mpp);
1951         }
1952
1953         /*
1954          * purge dm of old maps
1955          */
1956         remove_maps(vecs);
1957
1958         /*
1959          * save new set of maps formed by considering current path state
1960          */
1961         vector_free(vecs->mpvec);
1962         vecs->mpvec = mpvec;
1963
1964         /*
1965          * start dm event waiter threads for these new maps
1966          */
1967         vector_foreach_slot(vecs->mpvec, mpp, i) {
1968                 if (setup_multipath(vecs, mpp))
1969                         return 1;
1970                 if (start_waiters)
1971                         if (start_waiter_thread(mpp, vecs))
1972                                 return 1;
1973         }
1974         return 0;
1975 }
1976
1977 int
1978 need_to_delay_reconfig(struct vectors * vecs)
1979 {
1980         struct multipath *mpp;
1981         int i;
1982
1983         if (!VECTOR_SIZE(vecs->mpvec))
1984                 return 0;
1985
1986         vector_foreach_slot(vecs->mpvec, mpp, i) {
1987                 if (mpp->wait_for_udev)
1988                         return 1;
1989         }
1990         return 0;
1991 }
1992
1993 void rcu_free_config(struct rcu_head *head)
1994 {
1995         struct config *conf = container_of(head, struct config, rcu);
1996
1997         free_config(conf);
1998 }
1999
2000 int
2001 reconfigure (struct vectors * vecs)
2002 {
2003         struct config * old, *conf;
2004
2005         conf = load_config(DEFAULT_CONFIGFILE);
2006         if (!conf)
2007                 return 1;
2008
2009         /*
2010          * free old map and path vectors ... they use old conf state
2011          */
2012         if (VECTOR_SIZE(vecs->mpvec))
2013                 remove_maps_and_stop_waiters(vecs);
2014
2015         free_pathvec(vecs->pathvec, FREE_PATHS);
2016         vecs->pathvec = NULL;
2017
2018         /* Re-read any timezone changes */
2019         tzset();
2020
2021         dm_drv_version(conf->version, TGT_MPATH);
2022         if (verbosity)
2023                 conf->verbosity = verbosity;
2024         if (bindings_read_only)
2025                 conf->bindings_read_only = bindings_read_only;
2026         if (ignore_new_devs)
2027                 conf->ignore_new_devs = ignore_new_devs;
2028         uxsock_timeout = conf->uxsock_timeout;
2029
2030         old = rcu_dereference(multipath_conf);
2031         rcu_assign_pointer(multipath_conf, conf);
2032         call_rcu(&old->rcu, rcu_free_config);
2033
2034         configure(vecs, 1);
2035
2036
2037         return 0;
2038 }
2039
2040 static struct vectors *
2041 init_vecs (void)
2042 {
2043         struct vectors * vecs;
2044
2045         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2046
2047         if (!vecs)
2048                 return NULL;
2049
2050         pthread_mutex_init(&vecs->lock.mutex, NULL);
2051
2052         return vecs;
2053 }
2054
2055 static void *
2056 signal_set(int signo, void (*func) (int))
2057 {
2058         int r;
2059         struct sigaction sig;
2060         struct sigaction osig;
2061
2062         sig.sa_handler = func;
2063         sigemptyset(&sig.sa_mask);
2064         sig.sa_flags = 0;
2065
2066         r = sigaction(signo, &sig, &osig);
2067
2068         if (r < 0)
2069                 return (SIG_ERR);
2070         else
2071                 return (osig.sa_handler);
2072 }
2073
2074 void
2075 handle_signals(void)
2076 {
2077         if (exit_sig) {
2078                 condlog(2, "exit (signal)");
2079                 exit_daemon();
2080         }
2081         if (reconfig_sig) {
2082                 condlog(2, "reconfigure (signal)");
2083                 set_config_state(DAEMON_CONFIGURE);
2084         }
2085         if (log_reset_sig) {
2086                 condlog(2, "reset log (signal)");
2087                 pthread_mutex_lock(&logq_lock);
2088                 log_reset("multipathd");
2089                 pthread_mutex_unlock(&logq_lock);
2090         }
2091         exit_sig = 0;
2092         reconfig_sig = 0;
2093         log_reset_sig = 0;
2094 }
2095
2096 static void
2097 sighup (int sig)
2098 {
2099         reconfig_sig = 1;
2100 }
2101
2102 static void
2103 sigend (int sig)
2104 {
2105         exit_sig = 1;
2106 }
2107
2108 static void
2109 sigusr1 (int sig)
2110 {
2111         log_reset_sig = 1;
2112 }
2113
2114 static void
2115 sigusr2 (int sig)
2116 {
2117         condlog(3, "SIGUSR2 received");
2118 }
2119
2120 static void
2121 signal_init(void)
2122 {
2123         sigset_t set;
2124
2125         sigemptyset(&set);
2126         sigaddset(&set, SIGPIPE);
2127         pthread_sigmask(SIG_SETMASK, &set, NULL);
2128
2129         signal_set(SIGHUP, sighup);
2130         signal_set(SIGUSR1, sigusr1);
2131         signal_set(SIGUSR2, sigusr2);
2132         signal_set(SIGINT, sigend);
2133         signal_set(SIGTERM, sigend);
2134         signal(SIGPIPE, SIG_IGN);
2135 }
2136
2137 static void
2138 setscheduler (void)
2139 {
2140         int res;
2141         static struct sched_param sched_param = {
2142                 .sched_priority = 99
2143         };
2144
2145         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2146
2147         if (res == -1)
2148                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2149         return;
2150 }
2151
2152 static void
2153 set_oom_adj (void)
2154 {
2155 #ifdef OOM_SCORE_ADJ_MIN
2156         int retry = 1;
2157         char *file = "/proc/self/oom_score_adj";
2158         int score = OOM_SCORE_ADJ_MIN;
2159 #else
2160         int retry = 0;
2161         char *file = "/proc/self/oom_adj";
2162         int score = OOM_ADJUST_MIN;
2163 #endif
2164         FILE *fp;
2165         struct stat st;
2166         char *envp;
2167
2168         envp = getenv("OOMScoreAdjust");
2169         if (envp) {
2170                 condlog(3, "Using systemd provided OOMScoreAdjust");
2171                 return;
2172         }
2173         do {
2174                 if (stat(file, &st) == 0){
2175                         fp = fopen(file, "w");
2176                         if (!fp) {
2177                                 condlog(0, "couldn't fopen %s : %s", file,
2178                                         strerror(errno));
2179                                 return;
2180                         }
2181                         fprintf(fp, "%i", score);
2182                         fclose(fp);
2183                         return;
2184                 }
2185                 if (errno != ENOENT) {
2186                         condlog(0, "couldn't stat %s : %s", file,
2187                                 strerror(errno));
2188                         return;
2189                 }
2190 #ifdef OOM_ADJUST_MIN
2191                 file = "/proc/self/oom_adj";
2192                 score = OOM_ADJUST_MIN;
2193 #else
2194                 retry = 0;
2195 #endif
2196         } while (retry--);
2197         condlog(0, "couldn't adjust oom score");
2198 }
2199
2200 static int
2201 child (void * param)
2202 {
2203         pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2204         pthread_attr_t log_attr, misc_attr, uevent_attr;
2205         struct vectors * vecs;
2206         struct multipath * mpp;
2207         int i;
2208 #ifdef USE_SYSTEMD
2209         unsigned long checkint;
2210 #endif
2211         int rc;
2212         int pid_fd = -1;
2213         struct config *conf;
2214         char *envp;
2215
2216         mlockall(MCL_CURRENT | MCL_FUTURE);
2217         signal_init();
2218         rcu_init();
2219
2220         setup_thread_attr(&misc_attr, 64 * 1024, 1);
2221         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 1);
2222         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2223
2224         if (logsink == 1) {
2225                 setup_thread_attr(&log_attr, 64 * 1024, 0);
2226                 log_thread_start(&log_attr);
2227                 pthread_attr_destroy(&log_attr);
2228         }
2229         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2230         if (pid_fd < 0) {
2231                 condlog(1, "failed to create pidfile");
2232                 if (logsink == 1)
2233                         log_thread_stop();
2234                 exit(1);
2235         }
2236
2237         post_config_state(DAEMON_START);
2238
2239         condlog(2, "--------start up--------");
2240         condlog(2, "read " DEFAULT_CONFIGFILE);
2241
2242         conf = load_config(DEFAULT_CONFIGFILE);
2243         if (!conf)
2244                 goto failed;
2245
2246         if (verbosity)
2247                 conf->verbosity = verbosity;
2248         if (bindings_read_only)
2249                 conf->bindings_read_only = bindings_read_only;
2250         if (ignore_new_devs)
2251                 conf->ignore_new_devs = ignore_new_devs;
2252         uxsock_timeout = conf->uxsock_timeout;
2253         rcu_assign_pointer(multipath_conf, conf);
2254         dm_init(conf->verbosity);
2255         dm_drv_version(conf->version, TGT_MPATH);
2256         if (init_checkers(conf->multipath_dir)) {
2257                 condlog(0, "failed to initialize checkers");
2258                 goto failed;
2259         }
2260         if (init_prio(conf->multipath_dir)) {
2261                 condlog(0, "failed to initialize prioritizers");
2262                 goto failed;
2263         }
2264
2265         setlogmask(LOG_UPTO(conf->verbosity + 3));
2266
2267         envp = getenv("LimitNOFILE");
2268
2269         if (envp) {
2270                 condlog(2,"Using systemd provided open fds limit of %s", envp);
2271         } else if (conf->max_fds) {
2272                 struct rlimit fd_limit;
2273
2274                 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2275                         condlog(0, "can't get open fds limit: %s",
2276                                 strerror(errno));
2277                         fd_limit.rlim_cur = 0;
2278                         fd_limit.rlim_max = 0;
2279                 }
2280                 if (fd_limit.rlim_cur < conf->max_fds) {
2281                         fd_limit.rlim_cur = conf->max_fds;
2282                         if (fd_limit.rlim_max < conf->max_fds)
2283                                 fd_limit.rlim_max = conf->max_fds;
2284                         if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2285                                 condlog(0, "can't set open fds limit to "
2286                                         "%lu/%lu : %s",
2287                                         fd_limit.rlim_cur, fd_limit.rlim_max,
2288                                         strerror(errno));
2289                         } else {
2290                                 condlog(3, "set open fds limit to %lu/%lu",
2291                                         fd_limit.rlim_cur, fd_limit.rlim_max);
2292                         }
2293                 }
2294
2295         }
2296
2297         vecs = gvecs = init_vecs();
2298         if (!vecs)
2299                 goto failed;
2300
2301         setscheduler();
2302         set_oom_adj();
2303
2304         dm_udev_set_sync_support(0);
2305 #ifdef USE_SYSTEMD
2306         envp = getenv("WATCHDOG_USEC");
2307         if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2308                 /* Value is in microseconds */
2309                 conf->max_checkint = checkint / 1000000;
2310                 /* Rescale checkint */
2311                 if (conf->checkint > conf->max_checkint)
2312                         conf->checkint = conf->max_checkint;
2313                 else
2314                         conf->checkint = conf->max_checkint / 4;
2315                 condlog(3, "enabling watchdog, interval %d max %d",
2316                         conf->checkint, conf->max_checkint);
2317                 use_watchdog = conf->checkint;
2318         }
2319 #endif
2320         /*
2321          * Startup done, invalidate configuration
2322          */
2323         conf = NULL;
2324
2325         /*
2326          * Signal start of configuration
2327          */
2328         post_config_state(DAEMON_CONFIGURE);
2329
2330         /*
2331          * Start uevent listener early to catch events
2332          */
2333         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2334                 condlog(0, "failed to create uevent thread: %d", rc);
2335                 goto failed;
2336         }
2337         pthread_attr_destroy(&uevent_attr);
2338         if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2339                 condlog(0, "failed to create cli listener: %d", rc);
2340                 goto failed;
2341         }
2342
2343         /*
2344          * start threads
2345          */
2346         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2347                 condlog(0,"failed to create checker loop thread: %d", rc);
2348                 goto failed;
2349         }
2350         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2351                 condlog(0, "failed to create uevent dispatcher: %d", rc);
2352                 goto failed;
2353         }
2354         pthread_attr_destroy(&misc_attr);
2355
2356 #ifdef USE_SYSTEMD
2357         sd_notify(0, "READY=1");
2358 #endif
2359
2360         while (running_state != DAEMON_SHUTDOWN) {
2361                 pthread_cleanup_push(config_cleanup, NULL);
2362                 pthread_mutex_lock(&config_lock);
2363                 if (running_state != DAEMON_CONFIGURE &&
2364                     running_state != DAEMON_SHUTDOWN) {
2365                         pthread_cond_wait(&config_cond, &config_lock);
2366                 }
2367                 pthread_cleanup_pop(1);
2368                 if (running_state == DAEMON_CONFIGURE) {
2369                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2370                         lock(&vecs->lock);
2371                         pthread_testcancel();
2372                         if (!need_to_delay_reconfig(vecs)) {
2373                                 reconfigure(vecs);
2374                         } else {
2375                                 conf = get_multipath_config();
2376                                 conf->delayed_reconfig = 1;
2377                                 put_multipath_config(conf);
2378                         }
2379                         lock_cleanup_pop(vecs->lock);
2380                         post_config_state(DAEMON_IDLE);
2381                 }
2382         }
2383
2384         lock(&vecs->lock);
2385         conf = get_multipath_config();
2386         if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2387                 vector_foreach_slot(vecs->mpvec, mpp, i)
2388                         dm_queue_if_no_path(mpp->alias, 0);
2389         put_multipath_config(conf);
2390         remove_maps_and_stop_waiters(vecs);
2391         unlock(&vecs->lock);
2392
2393         pthread_cancel(check_thr);
2394         pthread_cancel(uevent_thr);
2395         pthread_cancel(uxlsnr_thr);
2396         pthread_cancel(uevq_thr);
2397
2398         pthread_join(check_thr, NULL);
2399         pthread_join(uevent_thr, NULL);
2400         pthread_join(uxlsnr_thr, NULL);
2401         pthread_join(uevq_thr, NULL);
2402
2403         lock(&vecs->lock);
2404         free_pathvec(vecs->pathvec, FREE_PATHS);
2405         vecs->pathvec = NULL;
2406         unlock(&vecs->lock);
2407
2408         pthread_mutex_destroy(&vecs->lock.mutex);
2409         FREE(vecs);
2410         vecs = NULL;
2411
2412         cleanup_checkers();
2413         cleanup_prio();
2414
2415         dm_lib_release();
2416         dm_lib_exit();
2417
2418         /* We're done here */
2419         condlog(3, "unlink pidfile");
2420         unlink(DEFAULT_PIDFILE);
2421
2422         condlog(2, "--------shut down-------");
2423
2424         if (logsink == 1)
2425                 log_thread_stop();
2426
2427         /*
2428          * Freeing config must be done after condlog() and dm_lib_exit(),
2429          * because logging functions like dlog() and dm_write_log()
2430          * reference the config.
2431          */
2432         conf = rcu_dereference(multipath_conf);
2433         rcu_assign_pointer(multipath_conf, NULL);
2434         call_rcu(&conf->rcu, rcu_free_config);
2435         udev_unref(udev);
2436         udev = NULL;
2437         pthread_attr_destroy(&waiter_attr);
2438 #ifdef _DEBUG_
2439         dbg_free_final(NULL);
2440 #endif
2441
2442 #ifdef USE_SYSTEMD
2443         sd_notify(0, "ERRNO=0");
2444 #endif
2445         exit(0);
2446
2447 failed:
2448 #ifdef USE_SYSTEMD
2449         sd_notify(0, "ERRNO=1");
2450 #endif
2451         if (pid_fd >= 0)
2452                 close(pid_fd);
2453         exit(1);
2454 }
2455
2456 static int
2457 daemonize(void)
2458 {
2459         int pid;
2460         int dev_null_fd;
2461
2462         if( (pid = fork()) < 0){
2463                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2464                 return -1;
2465         }
2466         else if (pid != 0)
2467                 return pid;
2468
2469         setsid();
2470
2471         if ( (pid = fork()) < 0)
2472                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2473         else if (pid != 0)
2474                 _exit(0);
2475
2476         if (chdir("/") < 0)
2477                 fprintf(stderr, "cannot chdir to '/', continuing\n");
2478
2479         dev_null_fd = open("/dev/null", O_RDWR);
2480         if (dev_null_fd < 0){
2481                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2482                         strerror(errno));
2483                 _exit(0);
2484         }
2485
2486         close(STDIN_FILENO);
2487         if (dup(dev_null_fd) < 0) {
2488                 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2489                         strerror(errno));
2490                 _exit(0);
2491         }
2492         close(STDOUT_FILENO);
2493         if (dup(dev_null_fd) < 0) {
2494                 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2495                         strerror(errno));
2496                 _exit(0);
2497         }
2498         close(STDERR_FILENO);
2499         if (dup(dev_null_fd) < 0) {
2500                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2501                         strerror(errno));
2502                 _exit(0);
2503         }
2504         close(dev_null_fd);
2505         daemon_pid = getpid();
2506         return 0;
2507 }
2508
2509 int
2510 main (int argc, char *argv[])
2511 {
2512         extern char *optarg;
2513         extern int optind;
2514         int arg;
2515         int err;
2516         int foreground = 0;
2517         struct config *conf;
2518
2519         logsink = 1;
2520
2521         if (getuid() != 0) {
2522                 fprintf(stderr, "need to be root\n");
2523                 exit(1);
2524         }
2525
2526         /* make sure we don't lock any path */
2527         if (chdir("/") < 0)
2528                 fprintf(stderr, "can't chdir to root directory : %s\n",
2529                         strerror(errno));
2530         umask(umask(077) | 022);
2531
2532         udev = udev_new();
2533
2534         while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2535                 switch(arg) {
2536                 case 'd':
2537                         foreground = 1;
2538                         if (logsink > 0)
2539                                 logsink = 0;
2540                         //debug=1; /* ### comment me out ### */
2541                         break;
2542                 case 'v':
2543                         if (sizeof(optarg) > sizeof(char *) ||
2544                             !isdigit(optarg[0]))
2545                                 exit(1);
2546
2547                         verbosity = atoi(optarg);
2548                         break;
2549                 case 's':
2550                         logsink = -1;
2551                         break;
2552                 case 'k':
2553                         conf = load_config(DEFAULT_CONFIGFILE);
2554                         if (!conf)
2555                                 exit(1);
2556                         if (verbosity)
2557                                 conf->verbosity = verbosity;
2558                         uxclnt(optarg, uxsock_timeout + 100);
2559                         exit(0);
2560                 case 'B':
2561                         bindings_read_only = 1;
2562                         break;
2563                 case 'n':
2564                         ignore_new_devs = 1;
2565                         break;
2566                 default:
2567                         fprintf(stderr, "Invalid argument '-%c'\n",
2568                                 optopt);
2569                         exit(1);
2570                 }
2571         }
2572         if (optind < argc) {
2573                 char cmd[CMDSIZE];
2574                 char * s = cmd;
2575                 char * c = s;
2576
2577                 conf = load_config(DEFAULT_CONFIGFILE);
2578                 if (!conf)
2579                         exit(1);
2580                 if (verbosity)
2581                         conf->verbosity = verbosity;
2582                 memset(cmd, 0x0, CMDSIZE);
2583                 while (optind < argc) {
2584                         if (strchr(argv[optind], ' '))
2585                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2586                         else
2587                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2588                         optind++;
2589                 }
2590                 c += snprintf(c, s + CMDSIZE - c, "\n");
2591                 uxclnt(s, uxsock_timeout + 100);
2592                 exit(0);
2593         }
2594
2595         if (foreground) {
2596                 if (!isatty(fileno(stdout)))
2597                         setbuf(stdout, NULL);
2598                 err = 0;
2599                 daemon_pid = getpid();
2600         } else
2601                 err = daemonize();
2602
2603         if (err < 0)
2604                 /* error */
2605                 exit(1);
2606         else if (err > 0)
2607                 /* parent dies */
2608                 exit(0);
2609         else
2610                 /* child lives */
2611                 return (child(NULL));
2612 }
2613
2614 void *  mpath_pr_event_handler_fn (void * pathp )
2615 {
2616         struct multipath * mpp;
2617         int i,j, ret, isFound;
2618         struct path * pp = (struct path *)pathp;
2619         unsigned char *keyp;
2620         uint64_t prkey;
2621         struct prout_param_descriptor *param;
2622         struct prin_resp *resp;
2623
2624         mpp = pp->mpp;
2625
2626         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2627         if (!resp){
2628                 condlog(0,"%s Alloc failed for prin response", pp->dev);
2629                 return NULL;
2630         }
2631
2632         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2633         if (ret != MPATH_PR_SUCCESS )
2634         {
2635                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2636                 goto out;
2637         }
2638
2639         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2640                         resp->prin_descriptor.prin_readkeys.additional_length );
2641
2642         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2643         {
2644                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2645                 ret = MPATH_PR_SUCCESS;
2646                 goto out;
2647         }
2648         prkey = 0;
2649         keyp = (unsigned char *)mpp->reservation_key;
2650         for (j = 0; j < 8; ++j) {
2651                 if (j > 0)
2652                         prkey <<= 8;
2653                 prkey |= *keyp;
2654                 ++keyp;
2655         }
2656         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ", prkey);
2657
2658         isFound =0;
2659         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2660         {
2661                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
2662                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2663                 if (!memcmp(mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2664                 {
2665                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2666                         isFound =1;
2667                         break;
2668                 }
2669         }
2670         if (!isFound)
2671         {
2672                 condlog(0, "%s: Either device not registered or ", pp->dev);
2673                 condlog(0, "host is not authorised for registration. Skip path");
2674                 ret = MPATH_PR_OTHER;
2675                 goto out;
2676         }
2677
2678         param= malloc(sizeof(struct prout_param_descriptor));
2679         memset(param, 0 , sizeof(struct prout_param_descriptor));
2680
2681         for (j = 7; j >= 0; --j) {
2682                 param->sa_key[j] = (prkey & 0xff);
2683                 prkey >>= 8;
2684         }
2685         param->num_transportid = 0;
2686
2687         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2688
2689         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2690         if (ret != MPATH_PR_SUCCESS )
2691         {
2692                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2693         }
2694         mpp->prflag = 1;
2695
2696         free(param);
2697 out:
2698         free(resp);
2699         return NULL;
2700 }
2701
2702 int mpath_pr_event_handle(struct path *pp)
2703 {
2704         pthread_t thread;
2705         int rc;
2706         pthread_attr_t attr;
2707         struct multipath * mpp;
2708
2709         mpp = pp->mpp;
2710
2711         if (!mpp->reservation_key)
2712                 return -1;
2713
2714         pthread_attr_init(&attr);
2715         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2716
2717         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2718         if (rc) {
2719                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2720                 return -1;
2721         }
2722         pthread_attr_destroy(&attr);
2723         rc = pthread_join(thread, NULL);
2724         return 0;
2725 }