Fix error
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include "autoconfig.h"
8 #include <unistd.h>
9 #include <sys/stat.h>
10 #include <libdevmapper.h>
11 #include <sys/wait.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <fcntl.h>
15 #include <errno.h>
16 #include <limits.h>
17 #include <linux/oom.h>
18 #include <libudev.h>
19 #include <urcu.h>
20 #include "fpin.h"
21 #ifdef USE_SYSTEMD
22 #include <systemd/sd-daemon.h>
23 #endif
24 #include <semaphore.h>
25 #include <time.h>
26 #include <stdbool.h>
27
28 /*
29  * libmultipath
30  */
31 #include "time-util.h"
32
33 /*
34  * libcheckers
35  */
36 #include "checkers.h"
37
38 /*
39  * libmultipath
40  */
41 #include "version.h"
42 #include "parser.h"
43 #include "vector.h"
44 #include "config.h"
45 #include "util.h"
46 #include "hwtable.h"
47 #include "defaults.h"
48 #include "structs.h"
49 #include "blacklist.h"
50 #include "structs_vec.h"
51 #include "dmparser.h"
52 #include "devmapper.h"
53 #include "sysfs.h"
54 #include "dict.h"
55 #include "discovery.h"
56 #include "debug.h"
57 #include "propsel.h"
58 #include "uevent.h"
59 #include "switchgroup.h"
60 #include "print.h"
61 #include "configure.h"
62 #include "prio.h"
63 #include "wwids.h"
64 #include "pgpolicies.h"
65 #include "log.h"
66 #include "uxsock.h"
67 #include "alias.h"
68
69 #include "mpath_cmd.h"
70 #include "mpath_persist.h"
71 #include "mpath_persist_int.h"
72
73 #include "prioritizers/alua_rtpg.h"
74
75 #include "main.h"
76 #include "pidfile.h"
77 #include "uxlsnr.h"
78 #include "uxclnt.h"
79 #include "cli.h"
80 #include "cli_handlers.h"
81 #include "lock.h"
82 #include "waiter.h"
83 #include "dmevents.h"
84 #include "io_err_stat.h"
85 #include "foreign.h"
86 #include "../third-party/valgrind/drd.h"
87 #include "init_unwinder.h"
88
89 #define CMDSIZE 160
90 #define MSG_SIZE 32
91
92 int mpath_pr_event_handle(struct path *pp);
93 void * mpath_pr_event_handler_fn (void * );
94
95 #define LOG_MSG(lvl, pp)                                        \
96 do {                                                            \
97         if (pp->mpp && checker_selected(&pp->checker) &&        \
98             lvl <= libmp_verbosity) {                                   \
99                 if (pp->offline)                                \
100                         condlog(lvl, "%s: %s - path offline",   \
101                                 pp->mpp->alias, pp->dev);       \
102                 else  {                                         \
103                         const char *__m =                       \
104                                 checker_message(&pp->checker);  \
105                                                                 \
106                         if (strlen(__m))                              \
107                                 condlog(lvl, "%s: %s - %s checker%s", \
108                                         pp->mpp->alias,               \
109                                         pp->dev,                      \
110                                         checker_name(&pp->checker),   \
111                                         __m);                         \
112                 }                                                     \
113         }                                                             \
114 } while(0)
115
116 struct mpath_event_param
117 {
118         char * devname;
119         struct multipath *mpp;
120 };
121
122 int uxsock_timeout;
123 static int verbosity;
124 static int bindings_read_only;
125 int ignore_new_devs;
126 #ifdef NO_DMEVENTS_POLL
127 static int poll_dmevents = 0;
128 #else
129 static int poll_dmevents = 1;
130 #endif
131 /* Don't access this variable without holding config_lock */
132 static enum daemon_status running_state = DAEMON_INIT;
133 /* Don't access this variable without holding config_lock */
134 static bool __delayed_reconfig;
135 pid_t daemon_pid;
136 static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
137 static pthread_cond_t config_cond;
138 static pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr,
139         fpin_thr, fpin_consumer_thr;
140 static bool check_thr_started, uevent_thr_started, uxlsnr_thr_started,
141         uevq_thr_started, dmevent_thr_started, fpin_thr_started,
142         fpin_consumer_thr_started;
143 static int pid_fd = -1;
144
145 static inline enum daemon_status get_running_state(void)
146 {
147         enum daemon_status st;
148
149         pthread_mutex_lock(&config_lock);
150         st = running_state;
151         pthread_mutex_unlock(&config_lock);
152         return st;
153 }
154
155 int should_exit(void)
156 {
157         return get_running_state() == DAEMON_SHUTDOWN;
158 }
159
160 /*
161  * global copy of vecs for use in sig handlers
162  */
163 static struct vectors * gvecs;
164
165 struct config *multipath_conf;
166
167 /* Local variables */
168 static volatile sig_atomic_t exit_sig;
169 static volatile sig_atomic_t reconfig_sig;
170 static volatile sig_atomic_t log_reset_sig;
171
172 static const char *daemon_status_msg[DAEMON_STATUS_SIZE] = {
173         [DAEMON_INIT] = "init",
174         [DAEMON_START] = "startup",
175         [DAEMON_CONFIGURE] = "configure",
176         [DAEMON_IDLE] = "idle",
177         [DAEMON_RUNNING] = "running",
178         [DAEMON_SHUTDOWN] = "shutdown",
179 };
180
181 const char *
182 daemon_status(void)
183 {
184         int status = get_running_state();
185
186         if (status < DAEMON_INIT || status >= DAEMON_STATUS_SIZE)
187                 return NULL;
188
189         return daemon_status_msg[status];
190 }
191
192 /*
193  * I love you too, systemd ...
194  */
195 #ifdef USE_SYSTEMD
196 static void do_sd_notify(enum daemon_status old_state,
197                          enum daemon_status new_state)
198 {
199         char notify_msg[MSG_SIZE];
200         const char *msg;
201         static bool startup_done = false;
202
203         /*
204          * Checkerloop switches back and forth between idle and running state.
205          * No need to tell systemd each time.
206          * These notifications cause a lot of overhead on dbus.
207          */
208         if ((new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING) &&
209             (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
210                 return;
211
212         if (new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING)
213                 msg = "up";
214         else
215                 msg = daemon_status_msg[new_state];
216
217         if (msg && !safe_sprintf(notify_msg, "STATUS=%s", msg))
218                 sd_notify(0, notify_msg);
219
220         if (new_state == DAEMON_SHUTDOWN) {
221                 /* Tell systemd that we're not RELOADING any more */
222                 if (old_state == DAEMON_CONFIGURE && startup_done)
223                         sd_notify(0, "READY=1");
224                 sd_notify(0, "STOPPING=1");
225         } else if (new_state == DAEMON_IDLE && old_state == DAEMON_CONFIGURE) {
226                 sd_notify(0, "READY=1");
227                 startup_done = true;
228         } else if (new_state == DAEMON_CONFIGURE && startup_done)
229                 sd_notify(0, "RELOADING=1");
230 }
231 #else
232 static void do_sd_notify(__attribute__((unused)) enum daemon_status old_state,
233                          __attribute__((unused)) enum daemon_status new_state)
234 {}
235 #endif
236
237 static void config_cleanup(__attribute__((unused)) void *arg)
238 {
239         pthread_mutex_unlock(&config_lock);
240 }
241
242 #define __wait_for_state_change(condition, ms)                          \
243         ({                                                              \
244                 struct timespec tmo;                                    \
245                 int rc = 0;                                             \
246                                                                         \
247                 if (condition) {                                        \
248                         get_monotonic_time(&tmo);                       \
249                         tmo.tv_nsec += (ms) * 1000 * 1000;              \
250                         normalize_timespec(&tmo);                       \
251                         do                                              \
252                                 rc = pthread_cond_timedwait(            \
253                                         &config_cond, &config_lock, &tmo); \
254                         while (rc == 0 && (condition));                 \
255                 }                                                       \
256                 rc;                                                     \
257         })
258
259 /*
260  * If the current status is @oldstate, wait for at most @ms milliseconds
261  * for the state to change, and return the new state, which may still be
262  * @oldstate.
263  */
264 enum daemon_status wait_for_state_change_if(enum daemon_status oldstate,
265                                             unsigned long ms)
266 {
267         enum daemon_status st;
268
269         if (oldstate == DAEMON_SHUTDOWN)
270                 return DAEMON_SHUTDOWN;
271
272         pthread_mutex_lock(&config_lock);
273         pthread_cleanup_push(config_cleanup, NULL);
274         __wait_for_state_change(running_state == oldstate, ms);
275         st = running_state;
276         pthread_cleanup_pop(1);
277         return st;
278 }
279
280 /* Don't access this variable without holding config_lock */
281 static enum force_reload_types reconfigure_pending = FORCE_RELOAD_NONE;
282
283 /* must be called with config_lock held */
284 static void __post_config_state(enum daemon_status state)
285 {
286         if (state != running_state && running_state != DAEMON_SHUTDOWN) {
287                 enum daemon_status old_state = running_state;
288
289                 running_state = state;
290                 pthread_cond_broadcast(&config_cond);
291                 do_sd_notify(old_state, state);
292                 condlog(4, "daemon state %s -> %s",
293                         daemon_status_msg[old_state], daemon_status_msg[state]);
294         }
295 }
296
297 void post_config_state(enum daemon_status state)
298 {
299         pthread_mutex_lock(&config_lock);
300         pthread_cleanup_push(config_cleanup, NULL);
301         __post_config_state(state);
302         pthread_cleanup_pop(1);
303 }
304
305 static bool unblock_reconfigure(void)
306 {
307         bool was_delayed;
308
309         pthread_mutex_lock(&config_lock);
310         was_delayed = __delayed_reconfig;
311         if (was_delayed) {
312                 __delayed_reconfig = false;
313                 /*
314                  * In IDLE state, make sure child() is woken up
315                  * Otherwise it will wake up when state switches to IDLE
316                  */
317                 if (running_state == DAEMON_IDLE)
318                         __post_config_state(DAEMON_CONFIGURE);
319         }
320         pthread_mutex_unlock(&config_lock);
321         if (was_delayed)
322                 condlog(3, "unblocked delayed reconfigure");
323         return was_delayed;
324 }
325
326 /*
327  * Make sure child() is woken up when a map is removed that multipathd
328  * is currently waiting for.
329  * Overrides libmultipath's weak symbol by the same name
330  */
331 void remove_map_callback(struct multipath *mpp)
332 {
333         if (mpp->wait_for_udev > 0)
334                 unblock_reconfigure();
335 }
336
337 void schedule_reconfigure(enum force_reload_types requested_type)
338 {
339         pthread_mutex_lock(&config_lock);
340         pthread_cleanup_push(config_cleanup, NULL);
341         enum force_reload_types type;
342
343         type = (reconfigure_pending == FORCE_RELOAD_YES ||
344                 requested_type == FORCE_RELOAD_YES) ?
345                FORCE_RELOAD_YES : FORCE_RELOAD_WEAK;
346         switch (running_state)
347         {
348         case DAEMON_SHUTDOWN:
349                 break;
350         case DAEMON_IDLE:
351                 reconfigure_pending = type;
352                 __post_config_state(DAEMON_CONFIGURE);
353                 break;
354         case DAEMON_CONFIGURE:
355         case DAEMON_RUNNING:
356                 reconfigure_pending = type;
357                 break;
358         default:
359                 break;
360         }
361         pthread_cleanup_pop(1);
362 }
363
364 static enum daemon_status set_config_state(enum daemon_status state)
365 {
366         int rc = 0;
367         enum daemon_status st;
368
369         pthread_cleanup_push(config_cleanup, NULL);
370         pthread_mutex_lock(&config_lock);
371
372         while (rc == 0 &&
373                running_state != state &&
374                running_state != DAEMON_SHUTDOWN &&
375                running_state != DAEMON_IDLE) {
376                 rc = pthread_cond_wait(&config_cond, &config_lock);
377         }
378
379         if (rc == 0 && running_state == DAEMON_IDLE && state != DAEMON_IDLE)
380                 __post_config_state(state);
381         st = running_state;
382
383         pthread_cleanup_pop(1);
384         return st;
385 }
386
387 struct config *get_multipath_config(void)
388 {
389         rcu_read_lock();
390         return rcu_dereference(multipath_conf);
391 }
392
393 void put_multipath_config(__attribute__((unused)) void *arg)
394 {
395         rcu_read_unlock();
396 }
397
398 /*
399  * The path group orderings that this function finds acceptable are different
400  * from now select_path_group determines the best pathgroup. The idea here is
401  * to only trigger a kernel reload when it is obvious that the pathgroups would
402  * be out of order, even if all the paths were usable. Thus pathgroups with
403  * PRIO_UNDEF are skipped, and the number of enabled paths doesn't matter here.
404  */
405 bool path_groups_in_order(struct multipath *mpp)
406 {
407         int i;
408         struct pathgroup *pgp;
409         bool seen_marginal_pg = false;
410         int last_prio = INT_MAX;
411
412         if (VECTOR_SIZE(mpp->pg) < 2)
413                 return true;
414
415         vector_foreach_slot(mpp->pg, pgp, i) {
416                 if (seen_marginal_pg && !pgp->marginal)
417                         return false;
418                 /* skip pgs with PRIO_UNDEF, since this is likely temporary */
419                 if (!pgp->paths || pgp->priority == PRIO_UNDEF)
420                         continue;
421                 if (pgp->marginal && !seen_marginal_pg) {
422                         seen_marginal_pg = true;
423                         last_prio = pgp->priority;
424                         continue;
425                 }
426                 if (pgp->priority > last_prio)
427                         return false;
428                 last_prio = pgp->priority;
429         }
430         return true;
431 }
432
433 static int
434 need_switch_pathgroup (struct multipath * mpp, bool *need_reload)
435 {
436         int bestpg;
437
438         *need_reload = false;
439         if (!mpp)
440                 return 0;
441
442         if (VECTOR_SIZE(mpp->pg) < 2)
443                 return 0;
444
445         bestpg = select_path_group(mpp);
446         if (mpp->pgfailback == -FAILBACK_MANUAL)
447                 return 0;
448
449         mpp->bestpg = bestpg;
450         *need_reload = !path_groups_in_order(mpp);
451
452         return (*need_reload || mpp->bestpg != mpp->nextpg);
453 }
454
455 static void
456 switch_pathgroup (struct multipath * mpp)
457 {
458         mpp->stat_switchgroup++;
459         dm_switchgroup(mpp->alias, mpp->bestpg);
460         condlog(2, "%s: switch to path group #%i",
461                  mpp->alias, mpp->bestpg);
462 }
463
464 static int
465 wait_for_events(struct multipath *mpp, struct vectors *vecs)
466 {
467         if (poll_dmevents)
468                 return watch_dmevents(mpp->alias);
469         else
470                 return start_waiter_thread(mpp, vecs);
471 }
472
473 static void
474 remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs)
475 {
476         /* devices are automatically removed by the dmevent polling code,
477          * so they don't need to be manually removed here */
478         condlog(3, "%s: removing map from internal tables", mpp->alias);
479         if (!poll_dmevents)
480                 stop_waiter_thread(mpp);
481         remove_map(mpp, vecs->pathvec, vecs->mpvec);
482 }
483
484 static void
485 remove_maps_and_stop_waiters(struct vectors *vecs)
486 {
487         int i;
488         struct multipath * mpp;
489
490         if (!vecs)
491                 return;
492
493         if (!poll_dmevents) {
494                 vector_foreach_slot(vecs->mpvec, mpp, i)
495                         stop_waiter_thread(mpp);
496         }
497         else
498                 unwatch_all_dmevents();
499
500         remove_maps(vecs);
501 }
502
503 int __setup_multipath(struct vectors *vecs, struct multipath *mpp,
504                       int reset)
505 {
506         if (dm_get_info(mpp->alias, &mpp->dmi)) {
507                 /* Error accessing table */
508                 condlog(2, "%s: cannot access table", mpp->alias);
509                 goto out;
510         }
511
512         if (update_multipath_strings(mpp, vecs->pathvec) != DMP_OK) {
513                 condlog(0, "%s: failed to setup multipath", mpp->alias);
514                 goto out;
515         }
516
517         if (reset) {
518                 set_no_path_retry(mpp);
519                 if (VECTOR_SIZE(mpp->paths) != 0)
520                         dm_cancel_deferred_remove(mpp);
521         }
522
523         return 0;
524 out:
525         remove_map_and_stop_waiter(mpp, vecs);
526         return 1;
527 }
528
529 int update_multipath (struct vectors *vecs, char *mapname, int reset)
530 {
531         struct multipath *mpp;
532         struct pathgroup  *pgp;
533         struct path *pp;
534         int i, j;
535
536         mpp = find_mp_by_alias(vecs->mpvec, mapname);
537
538         if (!mpp) {
539                 condlog(3, "%s: multipath map not found", mapname);
540                 return 2;
541         }
542
543         if (__setup_multipath(vecs, mpp, reset))
544                 return 1; /* mpp freed in setup_multipath */
545
546         /*
547          * compare checkers states with DM states
548          */
549         vector_foreach_slot (mpp->pg, pgp, i) {
550                 vector_foreach_slot (pgp->paths, pp, j) {
551                         if (pp->dmstate != PSTATE_FAILED)
552                                 continue;
553
554                         if (pp->state != PATH_DOWN) {
555                                 struct config *conf;
556                                 int oldstate = pp->state;
557                                 unsigned int checkint;
558
559                                 conf = get_multipath_config();
560                                 checkint = conf->checkint;
561                                 put_multipath_config(conf);
562                                 condlog(2, "%s: mark as failed", pp->dev);
563                                 mpp->stat_path_failures++;
564                                 pp->state = PATH_DOWN;
565                                 if (oldstate == PATH_UP ||
566                                     oldstate == PATH_GHOST)
567                                         update_queue_mode_del_path(mpp);
568
569                                 /*
570                                  * if opportune,
571                                  * schedule the next check earlier
572                                  */
573                                 if (pp->tick > checkint)
574                                         pp->tick = checkint;
575                         }
576                 }
577         }
578         return 0;
579 }
580
581 static bool
582 flush_map_nopaths(struct multipath *mpp, struct vectors *vecs) {
583         char alias[WWID_SIZE];
584
585         /*
586          * flush_map will fail if the device is open
587          */
588         strlcpy(alias, mpp->alias, WWID_SIZE);
589         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
590                 condlog(2, "%s Last path deleted, disabling queueing",
591                         mpp->alias);
592                 mpp->retry_tick = 0;
593                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
594                 mpp->disable_queueing = 1;
595                 mpp->stat_map_failures++;
596                 dm_queue_if_no_path(mpp->alias, 0);
597         }
598         if (!flush_map(mpp, vecs, 1)) {
599                 condlog(2, "%s: removed map after removing all paths", alias);
600                 return true;
601         }
602         return false;
603 }
604
605 static void
606 pr_register_active_paths(struct multipath *mpp)
607 {
608         unsigned int i, j;
609         struct path *pp;
610         struct pathgroup *pgp;
611
612         vector_foreach_slot (mpp->pg, pgp, i) {
613                 vector_foreach_slot (pgp->paths, pp, j) {
614                         if ((pp->state == PATH_UP) || (pp->state == PATH_GHOST))
615                                 mpath_pr_event_handle(pp);
616                 }
617         }
618 }
619
620 static int
621 update_map (struct multipath *mpp, struct vectors *vecs, int new_map)
622 {
623         int retries = 3;
624         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
625
626 retry:
627         condlog(4, "%s: updating new map", mpp->alias);
628         if (adopt_paths(vecs->pathvec, mpp)) {
629                 condlog(0, "%s: failed to adopt paths for new map update",
630                         mpp->alias);
631                 retries = -1;
632                 goto fail;
633         }
634         verify_paths(mpp);
635         if (VECTOR_SIZE(mpp->paths) == 0 &&
636             flush_map_nopaths(mpp, vecs))
637                 return 1;
638
639         mpp->action = ACT_RELOAD;
640
641         if (setup_map(mpp, &params, vecs)) {
642                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
643                 retries = -1;
644                 goto fail;
645         }
646         if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
647                 condlog(0, "%s: map_udate sleep", mpp->alias);
648                 free(params);
649                 params = NULL;
650                 sleep(1);
651                 goto retry;
652         }
653
654 fail:
655         if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) {
656                 condlog(0, "%s: failed to create new map", mpp->alias);
657                 remove_map(mpp, vecs->pathvec, vecs->mpvec);
658                 return 1;
659         }
660
661         if (setup_multipath(vecs, mpp))
662                 return 1;
663
664         sync_map_state(mpp);
665
666         if (mpp->prflag != PRFLAG_SET)
667                 update_map_pr(mpp);
668         if (mpp->prflag == PRFLAG_SET)
669                 pr_register_active_paths(mpp);
670
671         if (retries < 0)
672                 condlog(0, "%s: failed reload in new map update", mpp->alias);
673         return 0;
674 }
675
676 static struct multipath *
677 add_map_without_path (struct vectors *vecs, const char *alias)
678 {
679         struct multipath * mpp = alloc_multipath();
680         struct config *conf;
681
682         if (!mpp)
683                 return NULL;
684         if (!alias) {
685                 free(mpp);
686                 return NULL;
687         }
688
689         mpp->alias = strdup(alias);
690
691         if (dm_get_info(mpp->alias, &mpp->dmi)) {
692                 condlog(3, "%s: cannot access table", mpp->alias);
693                 goto out;
694         }
695         if (!strlen(mpp->wwid))
696                 dm_get_uuid(mpp->alias, mpp->wwid, WWID_SIZE);
697         if (!strlen(mpp->wwid))
698                 condlog(1, "%s: adding map with empty WWID", mpp->alias);
699         conf = get_multipath_config();
700         mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
701         put_multipath_config(conf);
702
703         if (update_multipath_table(mpp, vecs->pathvec, 0) != DMP_OK)
704                 goto out;
705
706         if (!vector_alloc_slot(vecs->mpvec))
707                 goto out;
708
709         vector_set_slot(vecs->mpvec, mpp);
710
711         if (update_map(mpp, vecs, 1) != 0) /* map removed */
712                 return NULL;
713
714         return mpp;
715 out:
716         remove_map(mpp, vecs->pathvec, vecs->mpvec);
717         return NULL;
718 }
719
720 static int
721 coalesce_maps(struct vectors *vecs, vector nmpv)
722 {
723         struct multipath * ompp;
724         vector ompv = vecs->mpvec;
725         unsigned int i, reassign_maps;
726         struct config *conf;
727
728         conf = get_multipath_config();
729         reassign_maps = conf->reassign_maps;
730         put_multipath_config(conf);
731         vector_foreach_slot (ompv, ompp, i) {
732                 condlog(3, "%s: coalesce map", ompp->alias);
733                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
734                         /*
735                          * remove all current maps not allowed by the
736                          * current configuration
737                          */
738                         if (dm_flush_map(ompp->alias)) {
739                                 condlog(0, "%s: unable to flush devmap",
740                                         ompp->alias);
741                                 /*
742                                  * may be just because the device is open
743                                  */
744                                 if (setup_multipath(vecs, ompp) != 0) {
745                                         i--;
746                                         continue;
747                                 }
748                                 if (!vector_alloc_slot(nmpv))
749                                         return 1;
750
751                                 vector_set_slot(nmpv, ompp);
752
753                                 vector_del_slot(ompv, i);
754                                 i--;
755                         }
756                         else
757                                 condlog(2, "%s devmap removed", ompp->alias);
758                 } else if (reassign_maps) {
759                         condlog(3, "%s: Reassign existing device-mapper"
760                                 " devices", ompp->alias);
761                         dm_reassign(ompp->alias);
762                 }
763         }
764         return 0;
765 }
766
767 static void
768 sync_maps_state(vector mpvec)
769 {
770         unsigned int i;
771         struct multipath *mpp;
772
773         vector_foreach_slot (mpvec, mpp, i)
774                 sync_map_state(mpp);
775 }
776
777 int
778 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
779 {
780         int r;
781
782         if (nopaths)
783                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
784         else
785                 r = dm_flush_map(mpp->alias);
786         /*
787          * clear references to this map before flushing so we can ignore
788          * the spurious uevent we may generate with the dm_flush_map call below
789          */
790         if (r) {
791                 if (r == 1)
792                         condlog(0, "%s: can't flush", mpp->alias);
793                 else {
794                         condlog(2, "%s: devmap deferred remove", mpp->alias);
795                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
796                 }
797                 return r;
798         }
799         else
800                 condlog(2, "%s: map flushed", mpp->alias);
801
802         remove_map_and_stop_waiter(mpp, vecs);
803
804         return 0;
805 }
806
807 static int
808 uev_add_map (struct uevent * uev, struct vectors * vecs)
809 {
810         char *alias;
811         int major = -1, minor = -1, rc;
812
813         condlog(3, "%s: add map (uevent)", uev->kernel);
814         alias = uevent_get_dm_name(uev);
815         if (!alias) {
816                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
817                 major = uevent_get_major(uev);
818                 minor = uevent_get_minor(uev);
819                 alias = dm_mapname(major, minor);
820                 if (!alias) {
821                         condlog(2, "%s: mapname not found for %d:%d",
822                                 uev->kernel, major, minor);
823                         return 1;
824                 }
825         }
826         pthread_cleanup_push(cleanup_lock, &vecs->lock);
827         lock(&vecs->lock);
828         pthread_testcancel();
829         rc = ev_add_map(uev->kernel, alias, vecs);
830         lock_cleanup_pop(vecs->lock);
831         free(alias);
832         return rc;
833 }
834
835 /*
836  * ev_add_map expects that the multipath device already exists in kernel
837  * before it is called. It just adds a device to multipathd or updates an
838  * existing device.
839  */
840 int
841 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
842 {
843         struct multipath * mpp;
844         int reassign_maps;
845         struct config *conf;
846
847         if (dm_is_mpath(alias) != 1) {
848                 condlog(4, "%s: not a multipath map", alias);
849                 return 0;
850         }
851
852         mpp = find_mp_by_alias(vecs->mpvec, alias);
853
854         if (mpp) {
855                 if (mpp->wait_for_udev > 1) {
856                         condlog(2, "%s: performing delayed actions",
857                                 mpp->alias);
858                         if (update_map(mpp, vecs, 0))
859                                 /* setup multipathd removed the map */
860                                 return 1;
861                 }
862                 conf = get_multipath_config();
863                 reassign_maps = conf->reassign_maps;
864                 put_multipath_config(conf);
865                 dm_get_info(mpp->alias, &mpp->dmi);
866                 if (mpp->wait_for_udev) {
867                         mpp->wait_for_udev = 0;
868                         if (!need_to_delay_reconfig(vecs) &&
869                             unblock_reconfigure())
870                                 return 0;
871                 }
872                 /*
873                  * Not really an error -- we generate our own uevent
874                  * if we create a multipath mapped device as a result
875                  * of uev_add_path
876                  */
877                 if (reassign_maps) {
878                         condlog(3, "%s: Reassign existing device-mapper devices",
879                                 alias);
880                         dm_reassign(alias);
881                 }
882                 return 0;
883         }
884         condlog(2, "%s: adding map", alias);
885
886         /*
887          * now we can register the map
888          */
889         if ((mpp = add_map_without_path(vecs, alias))) {
890                 sync_map_state(mpp);
891                 condlog(2, "%s: devmap %s registered", alias, dev);
892                 return 0;
893         } else {
894                 condlog(2, "%s: ev_add_map failed", dev);
895                 return 1;
896         }
897 }
898
899 static int
900 uev_remove_map (struct uevent * uev, struct vectors * vecs)
901 {
902         char *alias;
903         int minor;
904         struct multipath *mpp;
905
906         condlog(3, "%s: remove map (uevent)", uev->kernel);
907         alias = uevent_get_dm_name(uev);
908         if (!alias) {
909                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
910                 return 0;
911         }
912         minor = uevent_get_minor(uev);
913
914         pthread_cleanup_push(cleanup_lock, &vecs->lock);
915         lock(&vecs->lock);
916         pthread_testcancel();
917         mpp = find_mp_by_minor(vecs->mpvec, minor);
918
919         if (!mpp) {
920                 condlog(2, "%s: devmap not registered, can't remove",
921                         uev->kernel);
922                 goto out;
923         }
924         if (strcmp(mpp->alias, alias)) {
925                 condlog(2, "%s: map alias mismatch: have \"%s\", got \"%s\")",
926                         uev->kernel, mpp->alias, alias);
927                 goto out;
928         }
929
930         dm_queue_if_no_path(alias, 0);
931         remove_map_and_stop_waiter(mpp, vecs);
932 out:
933         lock_cleanup_pop(vecs->lock);
934         free(alias);
935         return 0;
936 }
937
938 /* Called from CLI handler */
939 int
940 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
941 {
942         struct multipath * mpp;
943
944         mpp = find_mp_by_minor(vecs->mpvec, minor);
945
946         if (!mpp) {
947                 condlog(2, "%s: devmap not registered, can't remove",
948                         devname);
949                 return 1;
950         }
951         if (strcmp(mpp->alias, alias)) {
952                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
953                         mpp->alias, mpp->dmi.minor, minor);
954                 return 1;
955         }
956         return flush_map(mpp, vecs, 0);
957 }
958
959 static void
960 rescan_path(struct udev_device *ud)
961 {
962         ud = udev_device_get_parent_with_subsystem_devtype(ud, "scsi",
963                                                            "scsi_device");
964         if (ud) {
965                 ssize_t ret =
966                         sysfs_attr_set_value(ud, "rescan", "1", strlen("1"));
967                 if (ret != strlen("1"))
968                         log_sysfs_attr_set_value(1, ret,
969                                                  "%s: failed to trigger rescan",
970                                                  udev_device_get_syspath(ud));
971         }
972 }
973
974 void
975 handle_path_wwid_change(struct path *pp, struct vectors *vecs)
976 {
977         struct udev_device *udd;
978         static const char add[] = "add";
979         ssize_t ret;
980         char dev[FILE_NAME_SIZE];
981
982         if (!pp || !pp->udev)
983                 return;
984
985         strlcpy(dev, pp->dev, sizeof(dev));
986         udd = udev_device_ref(pp->udev);
987         if (!(ev_remove_path(pp, vecs, 1) & REMOVE_PATH_SUCCESS) && pp->mpp) {
988                 pp->dmstate = PSTATE_FAILED;
989                 dm_fail_path(pp->mpp->alias, pp->dev_t);
990         }
991         rescan_path(udd);
992         ret = sysfs_attr_set_value(udd, "uevent", add, sizeof(add) - 1);
993         udev_device_unref(udd);
994         if (ret != sizeof(add) - 1)
995                 log_sysfs_attr_set_value(1, ret,
996                                          "%s: failed to trigger add event", dev);
997 }
998
999 bool
1000 check_path_wwid_change(struct path *pp)
1001 {
1002         char wwid[WWID_SIZE];
1003         int len = 0;
1004         size_t i;
1005
1006         if (!strlen(pp->wwid))
1007                 return false;
1008
1009         /* Get the real fresh device wwid by sgio. sysfs still has old
1010          * data, so only get_vpd_sgio will work to get the new wwid */
1011         len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
1012
1013         if (len <= 0) {
1014                 condlog(2, "%s: failed to check wwid by sgio: len = %d",
1015                         pp->dev, len);
1016                 return false;
1017         }
1018
1019         /*Strip any trailing blanks */
1020         for (i = strlen(pp->wwid); i > 0 && pp->wwid[i-1] == ' '; i--);
1021                 /* no-op */
1022         pp->wwid[i] = '\0';
1023         condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid);
1024
1025         if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
1026                 condlog(0, "%s: wwid '%s' doesn't match wwid '%s' from device",
1027                         pp->dev, pp->wwid, wwid);
1028                 return true;
1029         }
1030
1031         return false;
1032 }
1033
1034 /*
1035  * uev_add_path can call uev_update_path, and uev_update_path can call
1036  * uev_add_path
1037  */
1038 static int uev_update_path (struct uevent *uev, struct vectors * vecs);
1039
1040 static int
1041 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
1042 {
1043         struct path *pp;
1044         int ret = 0, i;
1045         struct config *conf;
1046         bool partial_init = false;
1047
1048         condlog(3, "%s: add path (uevent)", uev->kernel);
1049         if (strstr(uev->kernel, "..") != NULL) {
1050                 /*
1051                  * Don't allow relative device names in the pathvec
1052                  */
1053                 condlog(0, "%s: path name is invalid", uev->kernel);
1054                 return 1;
1055         }
1056
1057         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1058         lock(&vecs->lock);
1059         pthread_testcancel();
1060         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1061         if (pp) {
1062                 int r;
1063                 struct multipath *prev_mpp = NULL;
1064
1065                 if (pp->initialized == INIT_PARTIAL) {
1066                         partial_init = true;
1067                         goto out;
1068                 } else if (pp->initialized == INIT_REMOVED) {
1069                         condlog(3, "%s: re-adding removed path", pp->dev);
1070                         pp->initialized = INIT_NEW;
1071                         prev_mpp = pp->mpp;
1072                         if (prev_mpp == NULL)
1073                                 condlog(0, "Bug: %s was in INIT_REMOVED state without being a multipath member",
1074                                         pp->dev);
1075                         pp->mpp = NULL;
1076                         /* make sure get_uid() is called */
1077                         pp->wwid[0] = '\0';
1078                 } else
1079                         condlog(3,
1080                                 "%s: spurious uevent, path already in pathvec",
1081                                 uev->kernel);
1082
1083                 if (!pp->mpp && !strlen(pp->wwid)) {
1084                         condlog(3, "%s: reinitialize path", uev->kernel);
1085                         udev_device_unref(pp->udev);
1086                         pp->udev = udev_device_ref(uev->udev);
1087                         conf = get_multipath_config();
1088                         pthread_cleanup_push(put_multipath_config, conf);
1089                         r = pathinfo(pp, conf,
1090                                      DI_ALL | DI_BLACKLIST);
1091                         pthread_cleanup_pop(1);
1092                         if (r == PATHINFO_OK && !prev_mpp)
1093                                 ret = ev_add_path(pp, vecs, need_do_map);
1094                         else if (r == PATHINFO_OK &&
1095                                  !strncmp(pp->wwid, prev_mpp->wwid, WWID_SIZE)) {
1096                                 /*
1097                                  * Path was unsuccessfully removed, but now
1098                                  * re-added, and still belongs to the right map
1099                                  * - all fine, reinstate asap
1100                                  */
1101                                 pp->mpp = prev_mpp;
1102                                 pp->tick = 1;
1103                                 ret = 0;
1104                         } else if (prev_mpp) {
1105                                 /*
1106                                  * Bad: re-added path still hangs in wrong map
1107                                  * Make another attempt to remove the path
1108                                  */
1109                                 pp->mpp = prev_mpp;
1110                                 if (!(ev_remove_path(pp, vecs, true) &
1111                                       REMOVE_PATH_SUCCESS)) {
1112                                         /*
1113                                          * Failure in ev_remove_path will keep
1114                                          * path in pathvec in INIT_REMOVED state
1115                                          * Fail the path to make sure it isn't
1116                                          * used any more.
1117                                          */
1118                                         pp->dmstate = PSTATE_FAILED;
1119                                         dm_fail_path(pp->mpp->alias, pp->dev_t);
1120                                         condlog(1, "%s: failed to re-add path still mapped in %s",
1121                                                 pp->dev, pp->mpp->alias);
1122                                         ret = 1;
1123                                 } else if (r == PATHINFO_OK)
1124                                         /*
1125                                          * Path successfully freed, move on to
1126                                          * "new path" code path below
1127                                          */
1128                                         pp = NULL;
1129                         } else if (r == PATHINFO_SKIPPED) {
1130                                 condlog(3, "%s: remove blacklisted path",
1131                                         uev->kernel);
1132                                 i = find_slot(vecs->pathvec, (void *)pp);
1133                                 if (i != -1)
1134                                         vector_del_slot(vecs->pathvec, i);
1135                                 free_path(pp);
1136                         } else {
1137                                 condlog(0, "%s: failed to reinitialize path",
1138                                         uev->kernel);
1139                                 ret = 1;
1140                         }
1141                 }
1142         }
1143         if (pp)
1144                 goto out;
1145
1146         /*
1147          * get path vital state
1148          */
1149         conf = get_multipath_config();
1150         pthread_cleanup_push(put_multipath_config, conf);
1151         ret = alloc_path_with_pathinfo(conf, uev->udev,
1152                                        uev->wwid, DI_ALL, &pp);
1153         pthread_cleanup_pop(1);
1154         if (!pp) {
1155                 if (ret == PATHINFO_SKIPPED)
1156                         ret = 0;
1157                 else {
1158                         condlog(3, "%s: failed to get path info", uev->kernel);
1159                         ret = 1;
1160                 }
1161                 goto out;
1162         }
1163         ret = store_path(vecs->pathvec, pp);
1164         if (!ret) {
1165                 conf = get_multipath_config();
1166                 pp->checkint = conf->checkint;
1167                 put_multipath_config(conf);
1168                 ret = ev_add_path(pp, vecs, need_do_map);
1169         } else {
1170                 condlog(0, "%s: failed to store path info, "
1171                         "dropping event",
1172                         uev->kernel);
1173                 free_path(pp);
1174                 ret = 1;
1175         }
1176 out:
1177         lock_cleanup_pop(vecs->lock);
1178         if (partial_init)
1179                 return uev_update_path(uev, vecs);
1180         return ret;
1181 }
1182
1183 static int
1184 sysfs_get_ro (struct path *pp)
1185 {
1186         int ro;
1187         char buff[3]; /* Either "0\n\0" or "1\n\0" */
1188
1189         if (!pp->udev)
1190                 return -1;
1191
1192         if (!sysfs_attr_get_value_ok(pp->udev, "ro", buff, sizeof(buff))) {
1193                 condlog(3, "%s: Cannot read ro attribute in sysfs", pp->dev);
1194                 return -1;
1195         }
1196
1197         if (sscanf(buff, "%d\n", &ro) != 1 || ro < 0 || ro > 1) {
1198                 condlog(3, "%s: Cannot parse ro attribute", pp->dev);
1199                 return -1;
1200         }
1201
1202         return ro;
1203 }
1204
1205 /*
1206  * returns:
1207  * 0: added
1208  * 1: error
1209  */
1210 int
1211 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
1212 {
1213         struct multipath * mpp;
1214         char *params __attribute((cleanup(cleanup_charp))) = NULL;
1215         int retries = 3;
1216         int start_waiter = 0;
1217         int ret;
1218         int ro;
1219         unsigned char prflag = PRFLAG_UNSET;
1220
1221         /*
1222          * need path UID to go any further
1223          */
1224         if (strlen(pp->wwid) == 0) {
1225                 condlog(0, "%s: failed to get path uid", pp->dev);
1226                 goto fail; /* leave path added to pathvec */
1227         }
1228         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
1229         if (mpp && pp->size && mpp->size != pp->size) {
1230                 condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
1231                 int i = find_slot(vecs->pathvec, (void *)pp);
1232                 if (i != -1)
1233                         vector_del_slot(vecs->pathvec, i);
1234                 free_path(pp);
1235                 return 1;
1236         }
1237         if (mpp)
1238                 trigger_path_udev_change(pp, true);
1239         if (mpp && mpp->wait_for_udev &&
1240             (pathcount(mpp, PATH_UP) > 0 ||
1241              (pathcount(mpp, PATH_GHOST) > 0 &&
1242               path_get_tpgs(pp) != TPGS_IMPLICIT &&
1243               mpp->ghost_delay_tick <= 0))) {
1244                 /* if wait_for_udev is set and valid paths exist */
1245                 condlog(3, "%s: delaying path addition until %s is fully initialized",
1246                         pp->dev, mpp->alias);
1247                 mpp->wait_for_udev = 2;
1248                 orphan_path(pp, "waiting for create to complete");
1249                 return 0;
1250         }
1251
1252         pp->mpp = mpp;
1253 rescan:
1254         if (mpp) {
1255                 condlog(4,"%s: adopting all paths for path %s",
1256                         mpp->alias, pp->dev);
1257                 if (adopt_paths(vecs->pathvec, mpp) || pp->mpp != mpp ||
1258                     find_slot(mpp->paths, pp) == -1)
1259                         goto fail; /* leave path added to pathvec */
1260
1261                 verify_paths(mpp);
1262                 mpp->action = ACT_RELOAD;
1263                 prflag = mpp->prflag;
1264                 mpath_pr_event_handle(pp);
1265         } else {
1266                 if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
1267                         orphan_path(pp, "only one path");
1268                         return 0;
1269                 }
1270                 condlog(4,"%s: creating new map", pp->dev);
1271                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
1272                         mpp->action = ACT_CREATE;
1273                         /*
1274                          * We don't depend on ACT_CREATE, as domap will
1275                          * set it to ACT_NOTHING when complete.
1276                          */
1277                         start_waiter = 1;
1278                 }
1279                 else
1280                         goto fail; /* leave path added to pathvec */
1281         }
1282
1283         /* ro check - if new path is ro, force map to be ro as well */
1284         ro = sysfs_get_ro(pp);
1285         if (ro == 1)
1286                 mpp->force_readonly = 1;
1287
1288         if (!need_do_map)
1289                 return 0;
1290
1291         if (!dm_map_present(mpp->alias)) {
1292                 mpp->action = ACT_CREATE;
1293                 start_waiter = 1;
1294         }
1295         /*
1296          * push the map to the device-mapper
1297          */
1298         if (setup_map(mpp, &params, vecs)) {
1299                 condlog(0, "%s: failed to setup map for addition of new "
1300                         "path %s", mpp->alias, pp->dev);
1301                 goto fail_map;
1302         }
1303         /*
1304          * reload the map for the multipath mapped device
1305          */
1306         ret = domap(mpp, params, 1);
1307         while (ret == DOMAP_RETRY && retries-- > 0) {
1308                 condlog(0, "%s: retry domap for addition of new "
1309                         "path %s", mpp->alias, pp->dev);
1310                 sleep(1);
1311                 ret = domap(mpp, params, 1);
1312         }
1313         if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
1314                 condlog(0, "%s: failed in domap for addition of new "
1315                         "path %s", mpp->alias, pp->dev);
1316                 /*
1317                  * deal with asynchronous uevents :((
1318                  */
1319                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
1320                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
1321                         sleep(1);
1322                         update_mpp_paths(mpp, vecs->pathvec);
1323                         free(params);
1324                         params = NULL;
1325                         goto rescan;
1326                 }
1327                 else if (mpp->action == ACT_RELOAD)
1328                         condlog(0, "%s: giving up reload", mpp->alias);
1329                 else
1330                         goto fail_map;
1331         }
1332
1333         if ((mpp->action == ACT_CREATE ||
1334              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
1335             wait_for_events(mpp, vecs))
1336                         goto fail_map;
1337
1338         /*
1339          * update our state from kernel regardless of create or reload
1340          */
1341         if (setup_multipath(vecs, mpp))
1342                 goto fail; /* if setup_multipath fails, it removes the map */
1343
1344         sync_map_state(mpp);
1345
1346         if (retries >= 0) {
1347                 if (start_waiter)
1348                         update_map_pr(mpp);
1349                 if (mpp->prflag == PRFLAG_SET && prflag != PRFLAG_SET)
1350                                 pr_register_active_paths(mpp);
1351                 condlog(2, "%s [%s]: path added to devmap %s",
1352                         pp->dev, pp->dev_t, mpp->alias);
1353                 return 0;
1354         } else
1355                 goto fail;
1356
1357 fail_map:
1358         remove_map(mpp, vecs->pathvec, vecs->mpvec);
1359 fail:
1360         orphan_path(pp, "failed to add path");
1361         return 1;
1362 }
1363
1364 static int
1365 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
1366 {
1367         struct path *pp;
1368
1369         condlog(3, "%s: remove path (uevent)", uev->kernel);
1370         delete_foreign(uev->udev);
1371
1372         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1373         lock(&vecs->lock);
1374         pthread_testcancel();
1375         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1376         if (pp)
1377                 ev_remove_path(pp, vecs, need_do_map);
1378         lock_cleanup_pop(vecs->lock);
1379         if (!pp) /* Not an error; path might have been purged earlier */
1380                 condlog(0, "%s: path already removed", uev->kernel);
1381         return 0;
1382 }
1383
1384 int
1385 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
1386 {
1387         struct multipath * mpp;
1388         int i, retval = REMOVE_PATH_SUCCESS;
1389         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
1390
1391         /*
1392          * avoid referring to the map of an orphaned path
1393          */
1394         if ((mpp = pp->mpp)) {
1395                 /*
1396                  * Mark the path as removed. In case of success, we
1397                  * will delete it for good. Otherwise, it will be deleted
1398                  * later, unless all attempts to reload this map fail.
1399                  */
1400                 set_path_removed(pp);
1401
1402                 /*
1403                  * transform the mp->pg vector of vectors of paths
1404                  * into a mp->params string to feed the device-mapper
1405                  */
1406                 if (update_mpp_paths(mpp, vecs->pathvec)) {
1407                         condlog(0, "%s: failed to update paths",
1408                                 mpp->alias);
1409                         goto fail;
1410                 }
1411
1412                 /*
1413                  * we have to explicitly remove pp from mpp->paths,
1414                  * update_mpp_paths() doesn't do that.
1415                  */
1416                 i = find_slot(mpp->paths, pp);
1417                 if (i != -1)
1418                         vector_del_slot(mpp->paths, i);
1419
1420                 /*
1421                  * remove the map IF removing the last path. If
1422                  * flush_map_nopaths succeeds, the path has been removed.
1423                  */
1424                 if (VECTOR_SIZE(mpp->paths) == 0 &&
1425                     flush_map_nopaths(mpp, vecs))
1426                         goto out;
1427
1428                 if (setup_map(mpp, &params, vecs)) {
1429                         condlog(0, "%s: failed to setup map for"
1430                                 " removal of path %s", mpp->alias, pp->dev);
1431                         goto fail;
1432                 }
1433
1434                 if (mpp->wait_for_udev) {
1435                         mpp->wait_for_udev = 2;
1436                         retval = REMOVE_PATH_DELAY;
1437                         goto out;
1438                 }
1439
1440                 if (!need_do_map) {
1441                         retval = REMOVE_PATH_DELAY;
1442                         goto out;
1443                 }
1444                 /*
1445                  * reload the map
1446                  */
1447                 mpp->action = ACT_RELOAD;
1448                 if (domap(mpp, params, 1) == DOMAP_FAIL) {
1449                         condlog(0, "%s: failed in domap for "
1450                                 "removal of path %s",
1451                                 mpp->alias, pp->dev);
1452                         retval = REMOVE_PATH_FAILURE;
1453                 } else {
1454                         /*
1455                          * update our state from kernel
1456                          */
1457                         char devt[BLK_DEV_SIZE];
1458
1459                         strlcpy(devt, pp->dev_t, sizeof(devt));
1460
1461                         /* setup_multipath will free the path
1462                          * regardless of whether it succeeds or
1463                          * fails */
1464                         if (setup_multipath(vecs, mpp))
1465                                 return REMOVE_PATH_MAP_ERROR;
1466                         sync_map_state(mpp);
1467
1468                         condlog(2, "%s: path removed from map %s",
1469                                 devt, mpp->alias);
1470                 }
1471         } else {
1472                 /* mpp == NULL */
1473                 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
1474                         vector_del_slot(vecs->pathvec, i);
1475                 free_path(pp);
1476         }
1477 out:
1478         return retval;
1479
1480 fail:
1481         condlog(0, "%s: error removing path. removing map %s", pp->dev,
1482                 mpp->alias);
1483         remove_map_and_stop_waiter(mpp, vecs);
1484         return REMOVE_PATH_MAP_ERROR;
1485 }
1486
1487 int
1488 finish_path_init(struct path *pp, struct vectors * vecs)
1489 {
1490         int r;
1491         struct config *conf;
1492
1493         if (pp->udev && pp->uid_attribute && *pp->uid_attribute &&
1494             !udev_device_get_is_initialized(pp->udev))
1495                 return 0;
1496         conf = get_multipath_config();
1497         pthread_cleanup_push(put_multipath_config, conf);
1498         r = pathinfo(pp, conf, DI_ALL|DI_BLACKLIST);
1499         pthread_cleanup_pop(1);
1500
1501         if (r == PATHINFO_OK)
1502                 return 0;
1503
1504         condlog(0, "%s: error fully initializing path, removing", pp->dev);
1505         ev_remove_path(pp, vecs, 1);
1506         return -1;
1507 }
1508
1509 static bool
1510 needs_ro_update(struct multipath *mpp, int ro)
1511 {
1512         struct pathgroup * pgp;
1513         struct path * pp;
1514         unsigned int i, j;
1515
1516         if (!mpp || ro < 0)
1517                 return false;
1518         if (!has_dm_info(mpp))
1519                 return true;
1520         if (mpp->dmi.read_only == ro)
1521                 return false;
1522         if (ro == 1)
1523                 return true;
1524         vector_foreach_slot (mpp->pg, pgp, i) {
1525                 vector_foreach_slot (pgp->paths, pp, j) {
1526                         if (sysfs_get_ro(pp) == 1)
1527                                 return false;
1528                 }
1529         }
1530         return true;
1531 }
1532
1533 int resize_map(struct multipath *mpp, unsigned long long size,
1534                struct vectors * vecs)
1535 {
1536         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
1537         unsigned long long orig_size = mpp->size;
1538
1539         mpp->size = size;
1540         update_mpp_paths(mpp, vecs->pathvec);
1541         if (setup_map(mpp, &params, vecs) != 0) {
1542                 condlog(0, "%s: failed to setup map for resize : %s",
1543                         mpp->alias, strerror(errno));
1544                 mpp->size = orig_size;
1545                 return 1;
1546         }
1547         mpp->action = ACT_RESIZE;
1548         mpp->force_udev_reload = 1;
1549         if (domap(mpp, params, 1) == DOMAP_FAIL) {
1550                 condlog(0, "%s: failed to resize map : %s", mpp->alias,
1551                         strerror(errno));
1552                 mpp->size = orig_size;
1553                 return 1;
1554         }
1555         if (setup_multipath(vecs, mpp) != 0)
1556                 return 2;
1557         sync_map_state(mpp);
1558
1559         return 0;
1560 }
1561
1562 static int
1563 uev_update_path (struct uevent *uev, struct vectors * vecs)
1564 {
1565         int ro, retval = 0, rc;
1566         struct path * pp;
1567         struct config *conf;
1568         int needs_reinit = 0;
1569
1570         switch ((rc = change_foreign(uev->udev))) {
1571         case FOREIGN_OK:
1572                 /* known foreign path, ignore event */
1573                 return 0;
1574         case FOREIGN_IGNORED:
1575                 break;
1576         case FOREIGN_ERR:
1577                 condlog(3, "%s: error in change_foreign", __func__);
1578                 break;
1579         default:
1580                 condlog(1, "%s: return code %d of change_foreign is unsupported",
1581                         __func__, rc);
1582                 break;
1583         }
1584
1585         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1586         lock(&vecs->lock);
1587         pthread_testcancel();
1588
1589         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1590         if (pp) {
1591                 struct multipath *mpp = pp->mpp;
1592                 char wwid[WWID_SIZE];
1593                 int auto_resize;
1594
1595                 conf = get_multipath_config();
1596                 auto_resize = conf->auto_resize;
1597                 put_multipath_config(conf);
1598
1599                 if (pp->initialized == INIT_REQUESTED_UDEV) {
1600                         needs_reinit = 1;
1601                         goto out;
1602                 }
1603                 /* Don't deal with other types of failed initialization
1604                  * now. check_path will handle it */
1605                 if (!strlen(pp->wwid) && pp->initialized != INIT_PARTIAL)
1606                         goto out;
1607
1608                 strcpy(wwid, pp->wwid);
1609                 rc = get_uid(pp, pp->state, uev->udev, 0);
1610
1611                 if (rc != 0)
1612                         strcpy(pp->wwid, wwid);
1613                 else if (strlen(wwid) &&
1614                          strncmp(wwid, pp->wwid, WWID_SIZE) != 0) {
1615                         condlog(0, "%s: path wwid changed from '%s' to '%s'",
1616                                 uev->kernel, wwid, pp->wwid);
1617                         ev_remove_path(pp, vecs, 1);
1618                         needs_reinit = 1;
1619                         goto out;
1620                 } else if (pp->initialized == INIT_PARTIAL) {
1621                         udev_device_unref(pp->udev);
1622                         pp->udev = udev_device_ref(uev->udev);
1623                         if (finish_path_init(pp, vecs) < 0) {
1624                                 retval = 1;
1625                                 goto out;
1626                         }
1627                 } else {
1628                         udev_device_unref(pp->udev);
1629                         pp->udev = udev_device_ref(uev->udev);
1630                         conf = get_multipath_config();
1631                         pthread_cleanup_push(put_multipath_config, conf);
1632                         if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK)
1633                                 condlog(1, "%s: pathinfo failed after change uevent",
1634                                         uev->kernel);
1635                         pthread_cleanup_pop(1);
1636                 }
1637
1638                 ro = uevent_get_disk_ro(uev);
1639                 if (needs_ro_update(mpp, ro)) {
1640                         condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
1641
1642                         if (mpp->wait_for_udev)
1643                                 mpp->wait_for_udev = 2;
1644                         else {
1645                                 if (ro == 1)
1646                                         pp->mpp->force_readonly = 1;
1647                                 retval = reload_and_sync_map(mpp, vecs);
1648                                 if (retval == 2)
1649                                         condlog(2, "%s: map removed during reload", pp->dev);
1650                                 else {
1651                                         pp->mpp->force_readonly = 0;
1652                                         condlog(2, "%s: map %s reloaded (retval %d)", uev->kernel, mpp->alias, retval);
1653                                 }
1654                         }
1655                 }
1656                 if (auto_resize != AUTO_RESIZE_NEVER &&
1657                     !mpp->wait_for_udev) {
1658                         struct pathgroup *pgp;
1659                         struct path *pp2;
1660                         unsigned int i, j;
1661                         unsigned long long orig_size = mpp->size;
1662
1663                         if (!pp->size || pp->size == mpp->size ||
1664                             (pp->size < mpp->size &&
1665                              auto_resize == AUTO_RESIZE_GROW_ONLY))
1666                                 goto out;
1667
1668                         vector_foreach_slot(mpp->pg, pgp, i)
1669                                 vector_foreach_slot (pgp->paths, pp2, j)
1670                                         if (pp2->size && pp2->size != pp->size)
1671                                                 goto out;
1672                         retval = resize_map(mpp, pp->size, vecs);
1673                         if (retval == 2)
1674                                 condlog(2, "%s: map removed during resize", pp->dev);
1675                         else if (retval == 0)
1676                                 condlog(2, "%s: resized map from %llu to %llu",
1677                                         mpp->alias, orig_size, pp->size);
1678                 }
1679         }
1680 out:
1681         lock_cleanup_pop(vecs->lock);
1682         if (!pp) {
1683                 /* If the path is blacklisted, print a debug/non-default verbosity message. */
1684                 if (uev->udev) {
1685                         int flag = DI_SYSFS | DI_WWID;
1686
1687                         conf = get_multipath_config();
1688                         pthread_cleanup_push(put_multipath_config, conf);
1689                         retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
1690                         pthread_cleanup_pop(1);
1691
1692                         if (retval == PATHINFO_SKIPPED) {
1693                                 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
1694                                 return 0;
1695                         }
1696                 }
1697
1698                 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1699         }
1700         /* pp->initialized must not be INIT_PARTIAL if needs_reinit is set */
1701         if (needs_reinit)
1702                 retval = uev_add_path(uev, vecs, 1);
1703         return retval;
1704 }
1705
1706 static int
1707 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1708 {
1709         char *action = NULL, *devt = NULL;
1710         struct path *pp;
1711         int r = 1;
1712
1713         action = uevent_get_dm_action(uev);
1714         if (!action)
1715                 return 1;
1716         if (strncmp(action, "PATH_FAILED", 11))
1717                 goto out;
1718         devt = uevent_get_dm_path(uev);
1719         if (!devt) {
1720                 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1721                 goto out;
1722         }
1723
1724         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1725         lock(&vecs->lock);
1726         pthread_testcancel();
1727         pp = find_path_by_devt(vecs->pathvec, devt);
1728         if (!pp)
1729                 goto out_lock;
1730         r = io_err_stat_handle_pathfail(pp);
1731         if (r)
1732                 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1733                                 pp->dev);
1734 out_lock:
1735         lock_cleanup_pop(vecs->lock);
1736         free(devt);
1737         free(action);
1738         return r;
1739 out:
1740         free(action);
1741         return 1;
1742 }
1743
1744 static int
1745 map_discovery (struct vectors * vecs)
1746 {
1747         struct multipath * mpp;
1748         unsigned int i;
1749
1750         if (dm_get_maps(vecs->mpvec))
1751                 return 1;
1752
1753         vector_foreach_slot (vecs->mpvec, mpp, i)
1754                 if (update_multipath_table(mpp, vecs->pathvec, 0) != DMP_OK) {
1755                         remove_map(mpp, vecs->pathvec, vecs->mpvec);
1756                         i--;
1757                 }
1758
1759         return 0;
1760 }
1761
1762 int
1763 uev_trigger (struct uevent * uev, void * trigger_data)
1764 {
1765         int r = 0;
1766         struct vectors * vecs;
1767         struct uevent *merge_uev, *tmp;
1768         enum daemon_status state;
1769
1770         vecs = (struct vectors *)trigger_data;
1771
1772         pthread_cleanup_push(config_cleanup, NULL);
1773         pthread_mutex_lock(&config_lock);
1774         while (running_state != DAEMON_IDLE &&
1775                running_state != DAEMON_RUNNING &&
1776                running_state != DAEMON_SHUTDOWN)
1777                 pthread_cond_wait(&config_cond, &config_lock);
1778         state = running_state;
1779         pthread_cleanup_pop(1);
1780
1781         if (state == DAEMON_SHUTDOWN)
1782                 return 0;
1783
1784         /*
1785          * device map event
1786          * Add events are ignored here as the tables
1787          * are not fully initialised then.
1788          */
1789         if (!strncmp(uev->kernel, "dm-", 3)) {
1790                 if (!uevent_is_mpath(uev)) {
1791                         if (!strncmp(uev->action, "change", 6))
1792                                 (void)add_foreign(uev->udev);
1793                         else if (!strncmp(uev->action, "remove", 6))
1794                                 (void)delete_foreign(uev->udev);
1795                         goto out;
1796                 }
1797                 if (!strncmp(uev->action, "change", 6)) {
1798                         r = uev_add_map(uev, vecs);
1799
1800                         /*
1801                          * the kernel-side dm-mpath issues a PATH_FAILED event
1802                          * when it encounters a path IO error. It is reason-
1803                          * able be the entry of path IO error accounting pro-
1804                          * cess.
1805                          */
1806                         uev_pathfail_check(uev, vecs);
1807                 } else if (!strncmp(uev->action, "remove", 6)) {
1808                         r = uev_remove_map(uev, vecs);
1809                 }
1810                 goto out;
1811         }
1812
1813         /*
1814          * path add/remove/change event, add/remove maybe merged
1815          */
1816         list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1817                 if (!strncmp(merge_uev->action, "add", 3))
1818                         r += uev_add_path(merge_uev, vecs, 0);
1819                 if (!strncmp(merge_uev->action, "remove", 6))
1820                         r += uev_remove_path(merge_uev, vecs, 0);
1821         }
1822
1823         if (!strncmp(uev->action, "add", 3))
1824                 r += uev_add_path(uev, vecs, 1);
1825         if (!strncmp(uev->action, "remove", 6))
1826                 r += uev_remove_path(uev, vecs, 1);
1827         if (!strncmp(uev->action, "change", 6))
1828                 r += uev_update_path(uev, vecs);
1829
1830 out:
1831         return r;
1832 }
1833
1834 static void rcu_unregister(__attribute__((unused)) void *param)
1835 {
1836         rcu_unregister_thread();
1837 }
1838
1839 static void *
1840 ueventloop (void * ap)
1841 {
1842         struct udev *udev = ap;
1843
1844         pthread_cleanup_push(rcu_unregister, NULL);
1845         rcu_register_thread();
1846         if (uevent_listen(udev))
1847                 condlog(0, "error starting uevent listener");
1848         pthread_cleanup_pop(1);
1849         return NULL;
1850 }
1851
1852 static void *
1853 uevqloop (void * ap)
1854 {
1855         pthread_cleanup_push(rcu_unregister, NULL);
1856         rcu_register_thread();
1857         if (uevent_dispatch(&uev_trigger, ap))
1858                 condlog(0, "error starting uevent dispatcher");
1859         pthread_cleanup_pop(1);
1860         return NULL;
1861 }
1862 static void *
1863 uxlsnrloop (void * ap)
1864 {
1865         long ux_sock;
1866
1867         pthread_cleanup_push(rcu_unregister, NULL);
1868         rcu_register_thread();
1869
1870         ux_sock = ux_socket_listen(DEFAULT_SOCKET);
1871         if (ux_sock == -1) {
1872                 condlog(1, "could not create uxsock: %d", errno);
1873                 exit_daemon();
1874                 goto out;
1875         }
1876         pthread_cleanup_push(uxsock_cleanup, (void *)ux_sock);
1877
1878         if (cli_init()) {
1879                 condlog(1, "Failed to init uxsock listener");
1880                 exit_daemon();
1881                 goto out_sock;
1882         }
1883
1884         /* Tell main thread that thread has started */
1885         post_config_state(DAEMON_CONFIGURE);
1886
1887         umask(077);
1888
1889         /*
1890          * Wait for initial reconfiguration to finish, while
1891          * handling signals
1892          */
1893         while (wait_for_state_change_if(DAEMON_CONFIGURE, 50)
1894                == DAEMON_CONFIGURE)
1895                 handle_signals(false);
1896
1897         uxsock_listen(ux_sock, ap);
1898
1899 out_sock:
1900         pthread_cleanup_pop(1); /* uxsock_cleanup */
1901 out:
1902         pthread_cleanup_pop(1); /* rcu_unregister */
1903         return NULL;
1904 }
1905
1906 void
1907 exit_daemon (void)
1908 {
1909         post_config_state(DAEMON_SHUTDOWN);
1910 }
1911
1912 static void
1913 fail_path (struct path * pp, int del_active)
1914 {
1915         if (!pp->mpp)
1916                 return;
1917
1918         condlog(2, "checker failed path %s in map %s",
1919                  pp->dev_t, pp->mpp->alias);
1920
1921         dm_fail_path(pp->mpp->alias, pp->dev_t);
1922         if (del_active)
1923                 update_queue_mode_del_path(pp->mpp);
1924 }
1925
1926 /*
1927  * caller must have locked the path list before calling that function
1928  */
1929 static void
1930 reinstate_path (struct path * pp)
1931 {
1932         if (!pp->mpp)
1933                 return;
1934
1935         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1936                 condlog(0, "%s: reinstate failed", pp->dev_t);
1937         else {
1938                 condlog(2, "%s: reinstated", pp->dev_t);
1939                 update_queue_mode_add_path(pp->mpp);
1940         }
1941 }
1942
1943 static void
1944 enable_group(struct path * pp)
1945 {
1946         struct pathgroup * pgp;
1947
1948         /*
1949          * if path is added through uev_add_path, pgindex can be unset.
1950          * next update_strings() will set it, upon map reload event.
1951          *
1952          * we can safely return here, because upon map reload, all
1953          * PG will be enabled.
1954          */
1955         if (!pp->mpp->pg || !pp->pgindex)
1956                 return;
1957
1958         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1959
1960         if (pgp->status == PGSTATE_DISABLED) {
1961                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1962                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1963         }
1964 }
1965
1966 static void
1967 mpvec_garbage_collector (struct vectors * vecs)
1968 {
1969         struct multipath * mpp;
1970         unsigned int i;
1971
1972         if (!vecs->mpvec)
1973                 return;
1974
1975         vector_foreach_slot (vecs->mpvec, mpp, i) {
1976                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1977                         condlog(2, "%s: remove dead map", mpp->alias);
1978                         remove_map_and_stop_waiter(mpp, vecs);
1979                         i--;
1980                 }
1981         }
1982 }
1983
1984 /* This is called after a path has started working again. It the multipath
1985  * device for this path uses the followover failback type, and this is the
1986  * best pathgroup, and this is the first path in the pathgroup to come back
1987  * up, then switch to this pathgroup */
1988 static int
1989 followover_should_failback(struct path * pp)
1990 {
1991         struct pathgroup * pgp;
1992         struct path *pp1;
1993         int i;
1994
1995         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1996             !pp->mpp->pg || !pp->pgindex ||
1997             pp->pgindex != pp->mpp->bestpg)
1998                 return 0;
1999
2000         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
2001         vector_foreach_slot(pgp->paths, pp1, i) {
2002                 if (pp1 == pp)
2003                         continue;
2004                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
2005                         return 0;
2006         }
2007         return 1;
2008 }
2009
2010 static void
2011 missing_uev_wait_tick(struct vectors *vecs)
2012 {
2013         struct multipath * mpp;
2014         unsigned int i;
2015         int timed_out = 0;
2016
2017         vector_foreach_slot (vecs->mpvec, mpp, i) {
2018                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
2019                         timed_out = 1;
2020                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
2021                         if (mpp->wait_for_udev > 1 &&
2022                             update_map(mpp, vecs, 0)) {
2023                                 /* update_map removed map */
2024                                 i--;
2025                                 continue;
2026                         }
2027                         mpp->wait_for_udev = 0;
2028                 }
2029         }
2030
2031         if (timed_out && !need_to_delay_reconfig(vecs))
2032                 unblock_reconfigure();
2033 }
2034
2035 static void
2036 ghost_delay_tick(struct vectors *vecs)
2037 {
2038         struct multipath * mpp;
2039         unsigned int i;
2040
2041         vector_foreach_slot (vecs->mpvec, mpp, i) {
2042                 if (mpp->ghost_delay_tick <= 0)
2043                         continue;
2044                 if (--mpp->ghost_delay_tick <= 0) {
2045                         condlog(0, "%s: timed out waiting for active path",
2046                                 mpp->alias);
2047                         mpp->force_udev_reload = 1;
2048                         if (update_map(mpp, vecs, 0) != 0) {
2049                                 /* update_map removed map */
2050                                 i--;
2051                                 continue;
2052                         }
2053                 }
2054         }
2055 }
2056
2057 static void
2058 deferred_failback_tick (struct vectors *vecs)
2059 {
2060         struct multipath * mpp;
2061         unsigned int i;
2062         bool need_reload;
2063
2064         vector_foreach_slot (vecs->mpvec, mpp, i) {
2065                 /*
2066                  * deferred failback getting sooner
2067                  */
2068                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
2069                         mpp->failback_tick--;
2070
2071                         if (!mpp->failback_tick &&
2072                             need_switch_pathgroup(mpp, &need_reload)) {
2073                                 if (need_reload)
2074                                         reload_and_sync_map(mpp, vecs);
2075                                 else
2076                                         switch_pathgroup(mpp);
2077                         }
2078                 }
2079         }
2080 }
2081
2082 static void
2083 retry_count_tick(vector mpvec)
2084 {
2085         struct multipath *mpp;
2086         unsigned int i;
2087
2088         vector_foreach_slot (mpvec, mpp, i) {
2089                 if (mpp->retry_tick > 0) {
2090                         mpp->stat_total_queueing_time++;
2091                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
2092                         if(--mpp->retry_tick == 0) {
2093                                 mpp->stat_map_failures++;
2094                                 dm_queue_if_no_path(mpp->alias, 0);
2095                                 condlog(2, "%s: Disable queueing", mpp->alias);
2096                         }
2097                 }
2098         }
2099 }
2100
2101 static void
2102 partial_retrigger_tick(vector pathvec)
2103 {
2104         struct path *pp;
2105         unsigned int i;
2106
2107         vector_foreach_slot (pathvec, pp, i) {
2108                 if (pp->initialized == INIT_PARTIAL && pp->udev &&
2109                     pp->partial_retrigger_delay > 0 &&
2110                     --pp->partial_retrigger_delay == 0) {
2111                         const char *msg = udev_device_get_is_initialized(pp->udev) ?
2112                                           "change" : "add";
2113                         ssize_t len = strlen(msg);
2114                         ssize_t ret = sysfs_attr_set_value(pp->udev, "uevent", msg,
2115                                                            len);
2116
2117                         if (len != ret)
2118                                 log_sysfs_attr_set_value(2, ret,
2119                                         "%s: failed to trigger %s event",
2120                                         pp->dev, msg);
2121                 }
2122         }
2123 }
2124
2125 int update_prio(struct path *pp, int refresh_all)
2126 {
2127         int oldpriority;
2128         struct path *pp1;
2129         struct pathgroup * pgp;
2130         int i, j, changed = 0;
2131         struct config *conf;
2132
2133         oldpriority = pp->priority;
2134         if (pp->state != PATH_DOWN) {
2135                 conf = get_multipath_config();
2136                 pthread_cleanup_push(put_multipath_config, conf);
2137                 pathinfo(pp, conf, DI_PRIO);
2138                 pthread_cleanup_pop(1);
2139         }
2140
2141         if (pp->priority == oldpriority && !refresh_all)
2142                 return 0;
2143
2144         vector_foreach_slot (pp->mpp->pg, pgp, i) {
2145                 vector_foreach_slot (pgp->paths, pp1, j) {
2146                         if (pp1 == pp || pp1->state == PATH_DOWN)
2147                                 continue;
2148                         oldpriority = pp1->priority;
2149                         conf = get_multipath_config();
2150                         pthread_cleanup_push(put_multipath_config, conf);
2151                         pathinfo(pp1, conf, DI_PRIO);
2152                         pthread_cleanup_pop(1);
2153                         if (pp1->priority != oldpriority)
2154                                 changed = 1;
2155                 }
2156         }
2157         return changed;
2158 }
2159
2160 static int reload_map(struct vectors *vecs, struct multipath *mpp,
2161                       int is_daemon)
2162 {
2163         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
2164         int r;
2165
2166         update_mpp_paths(mpp, vecs->pathvec);
2167         if (setup_map(mpp, &params, vecs)) {
2168                 condlog(0, "%s: failed to setup map", mpp->alias);
2169                 return 1;
2170         }
2171         select_action(mpp, vecs->mpvec, 1);
2172
2173         r = domap(mpp, params, is_daemon);
2174         if (r == DOMAP_FAIL || r == DOMAP_RETRY) {
2175                 condlog(3, "%s: domap (%u) failure "
2176                         "for reload map", mpp->alias, r);
2177                 return 1;
2178         }
2179
2180         return 0;
2181 }
2182
2183 int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs)
2184 {
2185         if (reload_map(vecs, mpp, 1))
2186                 return 1;
2187         if (setup_multipath(vecs, mpp) != 0)
2188                 return 2;
2189         sync_map_state(mpp);
2190
2191         return 0;
2192 }
2193
2194 static int check_path_reinstate_state(struct path * pp) {
2195         struct timespec curr_time;
2196
2197         /*
2198          * This function is only called when the path state changes
2199          * from "bad" to "good". pp->state reflects the *previous* state.
2200          * If this was "bad", we know that a failure must have occurred
2201          * beforehand, and count that.
2202          * Note that we count path state _changes_ this way. If a path
2203          * remains in "bad" state, failure count is not increased.
2204          */
2205
2206         if (!((pp->mpp->san_path_err_threshold > 0) &&
2207                                 (pp->mpp->san_path_err_forget_rate > 0) &&
2208                                 (pp->mpp->san_path_err_recovery_time >0))) {
2209                 return 0;
2210         }
2211
2212         if (pp->disable_reinstate) {
2213                 /* If there are no other usable paths, reinstate the path */
2214                 if (count_active_paths(pp->mpp) == 0) {
2215                         condlog(2, "%s : reinstating path early", pp->dev);
2216                         goto reinstate_path;
2217                 }
2218                 get_monotonic_time(&curr_time);
2219
2220                 /* If path became failed again or continue failed, should reset
2221                  * path san_path_err_forget_rate and path dis_reinstate_time to
2222                  * start a new stable check.
2223                  */
2224                 if ((pp->state != PATH_UP) && (pp->state != PATH_GHOST) &&
2225                         (pp->state != PATH_DELAYED)) {
2226                         pp->san_path_err_forget_rate =
2227                                 pp->mpp->san_path_err_forget_rate;
2228                         pp->dis_reinstate_time = curr_time.tv_sec;
2229                 }
2230
2231                 if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
2232                         condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
2233                         goto reinstate_path;
2234                 }
2235                 return 1;
2236         }
2237         /* forget errors on a working path */
2238         if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
2239                         pp->path_failures > 0) {
2240                 if (pp->san_path_err_forget_rate > 0){
2241                         pp->san_path_err_forget_rate--;
2242                 } else {
2243                         /* for every san_path_err_forget_rate number of
2244                          * successful path checks decrement path_failures by 1
2245                          */
2246                         pp->path_failures--;
2247                         pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
2248                 }
2249                 return 0;
2250         }
2251
2252         /* If the path isn't recovering from a failed state, do nothing */
2253         if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
2254                         pp->state != PATH_TIMEOUT)
2255                 return 0;
2256
2257         if (pp->path_failures == 0)
2258                 pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
2259
2260         pp->path_failures++;
2261
2262         /* if we don't know the currently time, we don't know how long to
2263          * delay the path, so there's no point in checking if we should
2264          */
2265
2266         get_monotonic_time(&curr_time);
2267         /* when path failures has exceeded the san_path_err_threshold
2268          * place the path in delayed state till san_path_err_recovery_time
2269          * so that the customer can rectify the issue within this time. After
2270          * the completion of san_path_err_recovery_time it should
2271          * automatically reinstate the path
2272          * (note: we know that san_path_err_threshold > 0 here).
2273          */
2274         if (pp->path_failures > (unsigned int)pp->mpp->san_path_err_threshold) {
2275                 condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
2276                 pp->dis_reinstate_time = curr_time.tv_sec;
2277                 pp->disable_reinstate = 1;
2278
2279                 return 1;
2280         } else {
2281                 return 0;
2282         }
2283
2284 reinstate_path:
2285         pp->path_failures = 0;
2286         pp->disable_reinstate = 0;
2287         pp->san_path_err_forget_rate = 0;
2288         return 0;
2289 }
2290
2291 static int
2292 should_skip_path(struct path *pp){
2293         if (marginal_path_check_enabled(pp->mpp)) {
2294                 if (pp->io_err_disable_reinstate && need_io_err_check(pp))
2295                         return 1;
2296         } else if (san_path_check_enabled(pp->mpp)) {
2297                 if (check_path_reinstate_state(pp))
2298                         return 1;
2299         }
2300         return 0;
2301 }
2302
2303 /*
2304  * Returns '1' if the path has been checked, '-1' if it was blacklisted
2305  * and '0' otherwise
2306  */
2307 int
2308 check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
2309 {
2310         int newstate;
2311         int new_path_up = 0;
2312         int chkr_new_path_up = 0;
2313         int disable_reinstate = 0;
2314         int oldchkrstate = pp->chkrstate;
2315         int retrigger_tries;
2316         unsigned int checkint, max_checkint;
2317         struct config *conf;
2318         int marginal_pathgroups, marginal_changed = 0;
2319         int ret;
2320         bool need_reload;
2321
2322         if (((pp->initialized == INIT_OK || pp->initialized == INIT_PARTIAL ||
2323               pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp) ||
2324             pp->initialized == INIT_REMOVED)
2325                 return 0;
2326
2327         if (pp->tick)
2328                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
2329         if (pp->tick)
2330                 return 0; /* don't check this path yet */
2331
2332         conf = get_multipath_config();
2333         retrigger_tries = conf->retrigger_tries;
2334         checkint = conf->checkint;
2335         max_checkint = conf->max_checkint;
2336         marginal_pathgroups = conf->marginal_pathgroups;
2337         put_multipath_config(conf);
2338
2339         if (pp->checkint == CHECKINT_UNDEF) {
2340                 condlog(0, "%s: BUG: checkint is not set", pp->dev);
2341                 pp->checkint = checkint;
2342         };
2343
2344         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV) {
2345                 if (pp->retriggers < retrigger_tries) {
2346                         static const char change[] = "change";
2347                         ssize_t ret;
2348
2349                         condlog(2, "%s: triggering change event to reinitialize",
2350                                 pp->dev);
2351                         pp->initialized = INIT_REQUESTED_UDEV;
2352                         pp->retriggers++;
2353                         ret = sysfs_attr_set_value(pp->udev, "uevent", change,
2354                                                    sizeof(change) - 1);
2355                         if (ret != sizeof(change) - 1)
2356                                 log_sysfs_attr_set_value(1, ret,
2357                                                          "%s: failed to trigger change event",
2358                                                          pp->dev);
2359                         return 0;
2360                 } else {
2361                         condlog(1, "%s: not initialized after %d udev retriggers",
2362                                 pp->dev, retrigger_tries);
2363                         /*
2364                          * Make sure that the "add missing path" code path
2365                          * below may reinstate the path later, if it ever
2366                          * comes up again.
2367                          * The WWID needs not be cleared; if it was set, the
2368                          * state hadn't been INIT_MISSING_UDEV in the first
2369                          * place.
2370                          */
2371                         pp->initialized = INIT_FAILED;
2372                         return 0;
2373                 }
2374         }
2375
2376         /*
2377          * provision a next check soonest,
2378          * in case we exit abnormally from here
2379          */
2380         pp->tick = checkint;
2381
2382         newstate = path_offline(pp);
2383         if (newstate == PATH_UP) {
2384                 conf = get_multipath_config();
2385                 pthread_cleanup_push(put_multipath_config, conf);
2386                 newstate = get_state(pp, conf, 1, newstate);
2387                 pthread_cleanup_pop(1);
2388         } else {
2389                 checker_clear_message(&pp->checker);
2390                 condlog(3, "%s: state %s, checker not called",
2391                         pp->dev, checker_state_name(newstate));
2392         }
2393         /*
2394          * Wait for uevent for removed paths;
2395          * some LLDDs like zfcp keep paths unavailable
2396          * without sending uevents.
2397          */
2398         if (newstate == PATH_REMOVED)
2399                 newstate = PATH_DOWN;
2400
2401         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
2402                 condlog(2, "%s: unusable path (%s) - checker failed",
2403                         pp->dev, checker_state_name(newstate));
2404                 LOG_MSG(2, pp);
2405                 conf = get_multipath_config();
2406                 pthread_cleanup_push(put_multipath_config, conf);
2407                 pathinfo(pp, conf, 0);
2408                 pthread_cleanup_pop(1);
2409                 return 1;
2410         } else if ((newstate != PATH_UP && newstate != PATH_GHOST &&
2411                     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
2412                 /* If path state become failed again cancel path delay state */
2413                 pp->state = newstate;
2414                 /*
2415                  * path state bad again should change the check interval time
2416                  * to the shortest delay
2417                  */
2418                 pp->checkint = checkint;
2419                 return 1;
2420         }
2421         if (!pp->mpp) {
2422                 if (!strlen(pp->wwid) &&
2423                     (pp->initialized == INIT_FAILED ||
2424                      pp->initialized == INIT_NEW) &&
2425                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
2426                         condlog(2, "%s: add missing path", pp->dev);
2427                         conf = get_multipath_config();
2428                         pthread_cleanup_push(put_multipath_config, conf);
2429                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
2430                         pthread_cleanup_pop(1);
2431                         /* INIT_OK implies ret == PATHINFO_OK */
2432                         if (pp->initialized == INIT_OK) {
2433                                 ev_add_path(pp, vecs, 1);
2434                                 pp->tick = 1;
2435                         } else {
2436                                 if (ret == PATHINFO_SKIPPED)
2437                                         return -1;
2438                                 /*
2439                                  * We failed multiple times to initialize this
2440                                  * path properly. Don't re-check too often.
2441                                  */
2442                                 pp->checkint = max_checkint;
2443                         }
2444                 }
2445                 return 0;
2446         }
2447         /*
2448          * Async IO in flight. Keep the previous path state
2449          * and reschedule as soon as possible
2450          */
2451         if (newstate == PATH_PENDING) {
2452                 pp->tick = 1;
2453                 return 0;
2454         }
2455         /*
2456          * Synchronize with kernel state
2457          */
2458         ret = update_multipath_strings(pp->mpp, vecs->pathvec);
2459         if (ret != DMP_OK) {
2460                 if (ret == DMP_NOT_FOUND) {
2461                         /* multipath device missing. Likely removed */
2462                         condlog(1, "%s: multipath device '%s' not found",
2463                                 pp->dev, pp->mpp ? pp->mpp->alias : "");
2464                         return 0;
2465                 } else
2466                         condlog(1, "%s: Couldn't synchronize with kernel state",
2467                                 pp->dev);
2468                 pp->dmstate = PSTATE_UNDEF;
2469         }
2470         /* if update_multipath_strings orphaned the path, quit early */
2471         if (!pp->mpp)
2472                 return 0;
2473         set_no_path_retry(pp->mpp);
2474
2475         if (pp->recheck_wwid == RECHECK_WWID_ON &&
2476             (newstate == PATH_UP || newstate == PATH_GHOST) &&
2477             ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
2478              pp->dmstate == PSTATE_FAILED) &&
2479             check_path_wwid_change(pp)) {
2480                 condlog(0, "%s: path wwid change detected. Removing", pp->dev);
2481                 handle_path_wwid_change(pp, vecs);
2482                 return 0;
2483         }
2484
2485         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2486             (san_path_check_enabled(pp->mpp) ||
2487              marginal_path_check_enabled(pp->mpp))) {
2488                 if (should_skip_path(pp)) {
2489                         if (!pp->marginal && pp->state != PATH_DELAYED)
2490                                 condlog(2, "%s: path is now marginal", pp->dev);
2491                         if (!marginal_pathgroups) {
2492                                 if (marginal_path_check_enabled(pp->mpp))
2493                                         /* to reschedule as soon as possible,
2494                                          * so that this path can be recovered
2495                                          * in time */
2496                                         pp->tick = 1;
2497                                 pp->state = PATH_DELAYED;
2498                                 return 1;
2499                         }
2500                         if (!pp->marginal) {
2501                                 pp->marginal = 1;
2502                                 marginal_changed = 1;
2503                         }
2504                 } else {
2505                         if (pp->marginal || pp->state == PATH_DELAYED)
2506                                 condlog(2, "%s: path is no longer marginal",
2507                                         pp->dev);
2508                         if (marginal_pathgroups && pp->marginal) {
2509                                 pp->marginal = 0;
2510                                 marginal_changed = 1;
2511                         }
2512                 }
2513         }
2514
2515         /*
2516          * don't reinstate failed path, if its in stand-by
2517          * and if target supports only implicit tpgs mode.
2518          * this will prevent unnecessary i/o by dm on stand-by
2519          * paths if there are no other active paths in map.
2520          */
2521         disable_reinstate = (newstate == PATH_GHOST &&
2522                              count_active_paths(pp->mpp) == 0 &&
2523                              path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0;
2524
2525         pp->chkrstate = newstate;
2526         if (newstate != pp->state) {
2527                 int oldstate = pp->state;
2528                 pp->state = newstate;
2529
2530                 LOG_MSG(1, pp);
2531
2532                 /*
2533                  * upon state change, reset the checkint
2534                  * to the shortest delay
2535                  */
2536                 conf = get_multipath_config();
2537                 pp->checkint = conf->checkint;
2538                 put_multipath_config(conf);
2539
2540                 if (newstate != PATH_UP && newstate != PATH_GHOST) {
2541                         /*
2542                          * proactively fail path in the DM
2543                          */
2544                         if (oldstate == PATH_UP ||
2545                             oldstate == PATH_GHOST)
2546                                 fail_path(pp, 1);
2547                         else
2548                                 fail_path(pp, 0);
2549
2550                         /*
2551                          * cancel scheduled failback
2552                          */
2553                         pp->mpp->failback_tick = 0;
2554
2555                         pp->mpp->stat_path_failures++;
2556                         return 1;
2557                 }
2558
2559                 if (newstate == PATH_UP || newstate == PATH_GHOST) {
2560                         if (pp->mpp->prflag != PRFLAG_UNSET) {
2561                                 int prflag = pp->mpp->prflag;
2562                                 /*
2563                                  * Check Persistent Reservation.
2564                                  */
2565                                 condlog(2, "%s: checking persistent "
2566                                         "reservation registration", pp->dev);
2567                                 mpath_pr_event_handle(pp);
2568                                 if (pp->mpp->prflag == PRFLAG_SET &&
2569                                     prflag != PRFLAG_SET)
2570                                         pr_register_active_paths(pp->mpp);
2571                         }
2572                 }
2573
2574                 /*
2575                  * reinstate this path
2576                  */
2577                 if (!disable_reinstate)
2578                         reinstate_path(pp);
2579                 new_path_up = 1;
2580
2581                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
2582                         chkr_new_path_up = 1;
2583
2584                 /*
2585                  * if at least one path is up in a group, and
2586                  * the group is disabled, re-enable it
2587                  */
2588                 if (newstate == PATH_UP)
2589                         enable_group(pp);
2590         }
2591         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
2592                 if ((pp->dmstate == PSTATE_FAILED ||
2593                     pp->dmstate == PSTATE_UNDEF) &&
2594                     !disable_reinstate)
2595                         /* Clear IO errors */
2596                         reinstate_path(pp);
2597                 else {
2598                         LOG_MSG(4, pp);
2599                         if (pp->checkint != max_checkint) {
2600                                 /*
2601                                  * double the next check delay.
2602                                  * max at conf->max_checkint
2603                                  */
2604                                 if (pp->checkint < (max_checkint / 2))
2605                                         pp->checkint = 2 * pp->checkint;
2606                                 else
2607                                         pp->checkint = max_checkint;
2608
2609                                 condlog(4, "%s: delay next check %is",
2610                                         pp->dev_t, pp->checkint);
2611                         }
2612                         pp->tick = pp->checkint;
2613                 }
2614         }
2615         else if (newstate != PATH_UP && newstate != PATH_GHOST) {
2616                 if (pp->dmstate == PSTATE_ACTIVE ||
2617                     pp->dmstate == PSTATE_UNDEF)
2618                         fail_path(pp, 0);
2619                 if (newstate == PATH_DOWN) {
2620                         int log_checker_err;
2621
2622                         conf = get_multipath_config();
2623                         log_checker_err = conf->log_checker_err;
2624                         put_multipath_config(conf);
2625                         if (log_checker_err == LOG_CHKR_ERR_ONCE)
2626                                 LOG_MSG(3, pp);
2627                         else
2628                                 LOG_MSG(2, pp);
2629                 }
2630         }
2631
2632         pp->state = newstate;
2633
2634         if (pp->mpp->wait_for_udev)
2635                 return 1;
2636         /*
2637          * path prio refreshing
2638          */
2639         condlog(4, "path prio refresh");
2640
2641         if (marginal_changed) {
2642                 update_prio(pp, 1);
2643                 reload_and_sync_map(pp->mpp, vecs);
2644         } else if (update_prio(pp, new_path_up) &&
2645                    pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio &&
2646                    pp->mpp->pgfailback == -FAILBACK_IMMEDIATE) {
2647                 condlog(2, "%s: path priorities changed. reloading",
2648                         pp->mpp->alias);
2649                 reload_and_sync_map(pp->mpp, vecs);
2650         } else if (need_switch_pathgroup(pp->mpp, &need_reload)) {
2651                 if (pp->mpp->pgfailback > 0 &&
2652                     (new_path_up || pp->mpp->failback_tick <= 0))
2653                         pp->mpp->failback_tick = pp->mpp->pgfailback + 1;
2654                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
2655                          (chkr_new_path_up && followover_should_failback(pp))) {
2656                         if (need_reload)
2657                                 reload_and_sync_map(pp->mpp, vecs);
2658                         else
2659                                 switch_pathgroup(pp->mpp);
2660                 }
2661         }
2662         return 1;
2663 }
2664
2665 enum checker_state {
2666         CHECKER_STARTING,
2667         CHECKER_RUNNING,
2668         CHECKER_FINISHED,
2669 };
2670
2671 static void *
2672 checkerloop (void *ap)
2673 {
2674         struct vectors *vecs;
2675         struct path *pp;
2676         int count = 0;
2677         struct timespec last_time;
2678         struct config *conf;
2679         int foreign_tick = 0;
2680 #ifdef USE_SYSTEMD
2681         bool use_watchdog;
2682 #endif
2683
2684         pthread_cleanup_push(rcu_unregister, NULL);
2685         rcu_register_thread();
2686         mlockall(MCL_CURRENT | MCL_FUTURE);
2687         vecs = (struct vectors *)ap;
2688
2689         /* Tweak start time for initial path check */
2690         get_monotonic_time(&last_time);
2691         last_time.tv_sec -= 1;
2692
2693         /* use_watchdog is set from process environment and never changes */
2694         conf = get_multipath_config();
2695 #ifdef USE_SYSTEMD
2696         use_watchdog = conf->use_watchdog;
2697 #endif
2698         put_multipath_config(conf);
2699
2700         while (1) {
2701                 struct timespec diff_time, start_time, end_time;
2702                 int num_paths = 0, strict_timing, rc = 0, i = 0;
2703                 unsigned int ticks = 0;
2704                 enum checker_state checker_state = CHECKER_STARTING;
2705
2706                 if (set_config_state(DAEMON_RUNNING) != DAEMON_RUNNING)
2707                         /* daemon shutdown */
2708                         break;
2709
2710                 get_monotonic_time(&start_time);
2711                 timespecsub(&start_time, &last_time, &diff_time);
2712                 condlog(4, "tick (%ld.%06lu secs)",
2713                         (long)diff_time.tv_sec, diff_time.tv_nsec / 1000);
2714                 last_time = start_time;
2715                 ticks = diff_time.tv_sec;
2716 #ifdef USE_SYSTEMD
2717                 if (use_watchdog)
2718                         sd_notify(0, "WATCHDOG=1");
2719 #endif
2720                 while (checker_state != CHECKER_FINISHED) {
2721                         unsigned int paths_checked = 0;
2722                         struct timespec chk_start_time;
2723
2724                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2725                         lock(&vecs->lock);
2726                         pthread_testcancel();
2727                         get_monotonic_time(&chk_start_time);
2728                         if (checker_state == CHECKER_STARTING) {
2729                                 vector_foreach_slot(vecs->pathvec, pp, i)
2730                                         pp->is_checked = false;
2731                                 i = 0;
2732                                 checker_state = CHECKER_RUNNING;
2733                         } else {
2734                                 /*
2735                                  * Paths could have been removed since we
2736                                  * dropped the lock. Find the path to continue
2737                                  * checking at. Since paths can be removed from
2738                                  * anywhere in the vector, but can only be added
2739                                  * at the end, the last checked path must be
2740                                  * between its old location, and the start or
2741                                  * the vector.
2742                                  */
2743                                 if (i >= VECTOR_SIZE(vecs->pathvec))
2744                                         i = VECTOR_SIZE(vecs->pathvec) - 1;
2745                                 while ((pp = VECTOR_SLOT(vecs->pathvec, i))) {
2746                                         if (pp->is_checked == true)
2747                                                 break;
2748                                         i--;
2749                                 }
2750                                 i++;
2751                         }
2752                         vector_foreach_slot_after (vecs->pathvec, pp, i) {
2753                                 pp->is_checked = true;
2754                                 rc = check_path(vecs, pp, ticks);
2755                                 if (rc < 0) {
2756                                         condlog(1, "%s: check_path() failed, removing",
2757                                                 pp->dev);
2758                                         vector_del_slot(vecs->pathvec, i);
2759                                         free_path(pp);
2760                                         i--;
2761                                 } else
2762                                         num_paths += rc;
2763                                 if (++paths_checked % 128 == 0 &&
2764                                     (lock_has_waiters(&vecs->lock) ||
2765                                      waiting_clients())) {
2766                                         get_monotonic_time(&end_time);
2767                                         timespecsub(&end_time, &chk_start_time,
2768                                                     &diff_time);
2769                                         if (diff_time.tv_sec > 0)
2770                                                 goto unlock;
2771                                 }
2772                         }
2773                         checker_state = CHECKER_FINISHED;
2774 unlock:
2775                         lock_cleanup_pop(vecs->lock);
2776                         if (checker_state != CHECKER_FINISHED) {
2777                                 /* Yield to waiters */
2778                                 struct timespec wait = { .tv_nsec = 10000, };
2779                                 nanosleep(&wait, NULL);
2780                         }
2781                 }
2782
2783                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2784                 lock(&vecs->lock);
2785                 pthread_testcancel();
2786                 deferred_failback_tick(vecs);
2787                 retry_count_tick(vecs->mpvec);
2788                 missing_uev_wait_tick(vecs);
2789                 ghost_delay_tick(vecs);
2790                 partial_retrigger_tick(vecs->pathvec);
2791                 lock_cleanup_pop(vecs->lock);
2792
2793                 if (count)
2794                         count--;
2795                 else {
2796                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2797                         lock(&vecs->lock);
2798                         pthread_testcancel();
2799                         condlog(4, "map garbage collection");
2800                         mpvec_garbage_collector(vecs);
2801                         count = MAPGCINT;
2802                         lock_cleanup_pop(vecs->lock);
2803                 }
2804
2805                 get_monotonic_time(&end_time);
2806                 timespecsub(&end_time, &start_time, &diff_time);
2807                 if (num_paths) {
2808                         unsigned int max_checkint;
2809
2810                         condlog(4, "checked %d path%s in %ld.%06lu secs",
2811                                 num_paths, num_paths > 1 ? "s" : "",
2812                                 (long)diff_time.tv_sec,
2813                                 diff_time.tv_nsec / 1000);
2814                         conf = get_multipath_config();
2815                         max_checkint = conf->max_checkint;
2816                         put_multipath_config(conf);
2817                         if (diff_time.tv_sec > (time_t)max_checkint)
2818                                 condlog(1, "path checkers took longer "
2819                                         "than %ld seconds, consider "
2820                                         "increasing max_polling_interval",
2821                                         (long)diff_time.tv_sec);
2822                 }
2823
2824                 if (foreign_tick == 0) {
2825                         conf = get_multipath_config();
2826                         foreign_tick = conf->max_checkint;
2827                         put_multipath_config(conf);
2828                 }
2829                 if (--foreign_tick == 0)
2830                         check_foreign();
2831
2832                 post_config_state(DAEMON_IDLE);
2833                 conf = get_multipath_config();
2834                 strict_timing = conf->strict_timing;
2835                 put_multipath_config(conf);
2836                 if (!strict_timing)
2837                         sleep(1);
2838                 else {
2839                         diff_time.tv_sec = 0;
2840                         diff_time.tv_nsec =
2841                              1000UL * 1000 * 1000 - diff_time.tv_nsec;
2842                         normalize_timespec(&diff_time);
2843
2844                         condlog(3, "waiting for %ld.%06lu secs",
2845                                 (long)diff_time.tv_sec,
2846                                 diff_time.tv_nsec / 1000);
2847                         if (nanosleep(&diff_time, NULL) != 0) {
2848                                 condlog(3, "nanosleep failed with error %d",
2849                                         errno);
2850                                 conf = get_multipath_config();
2851                                 conf->strict_timing = 0;
2852                                 put_multipath_config(conf);
2853                                 break;
2854                         }
2855                 }
2856         }
2857         pthread_cleanup_pop(1);
2858         return NULL;
2859 }
2860
2861 static int
2862 configure (struct vectors * vecs, enum force_reload_types reload_type)
2863 {
2864         struct multipath * mpp;
2865         struct path * pp;
2866         vector mpvec;
2867         int i, ret;
2868         struct config *conf;
2869
2870         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
2871                 condlog(0, "couldn't allocate path vec in configure");
2872                 return 1;
2873         }
2874
2875         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
2876                 condlog(0, "couldn't allocate multipath vec in configure");
2877                 return 1;
2878         }
2879
2880         if (!(mpvec = vector_alloc())) {
2881                 condlog(0, "couldn't allocate new maps vec in configure");
2882                 return 1;
2883         }
2884
2885         /*
2886          * probe for current path (from sysfs) and map (from dm) sets
2887          */
2888         ret = path_discovery(vecs->pathvec, DI_ALL);
2889         if (ret < 0) {
2890                 condlog(0, "configure failed at path discovery");
2891                 goto fail;
2892         }
2893
2894         if (should_exit())
2895                 goto fail;
2896
2897         conf = get_multipath_config();
2898         pthread_cleanup_push(put_multipath_config, conf);
2899         vector_foreach_slot (vecs->pathvec, pp, i){
2900                 if (filter_path(conf, pp) > 0){
2901                         vector_del_slot(vecs->pathvec, i);
2902                         free_path(pp);
2903                         i--;
2904                 }
2905         }
2906         pthread_cleanup_pop(1);
2907
2908         if (map_discovery(vecs)) {
2909                 condlog(0, "configure failed at map discovery");
2910                 goto fail;
2911         }
2912
2913         if (should_exit())
2914                 goto fail;
2915
2916         ret = coalesce_paths(vecs, mpvec, NULL, reload_type, CMD_NONE);
2917         if (ret != CP_OK) {
2918                 condlog(0, "configure failed while coalescing paths");
2919                 goto fail;
2920         }
2921
2922         if (should_exit())
2923                 goto fail;
2924
2925         /*
2926          * may need to remove some maps which are no longer relevant
2927          * e.g., due to blacklist changes in conf file
2928          */
2929         if (coalesce_maps(vecs, mpvec)) {
2930                 condlog(0, "configure failed while coalescing maps");
2931                 goto fail;
2932         }
2933
2934         if (should_exit())
2935                 goto fail;
2936
2937         sync_maps_state(mpvec);
2938         vector_foreach_slot(mpvec, mpp, i){
2939                 if (remember_wwid(mpp->wwid) == 1)
2940                         trigger_paths_udev_change(mpp, true);
2941                 update_map_pr(mpp);
2942                 if (mpp->prflag == PRFLAG_SET)
2943                         pr_register_active_paths(mpp);
2944         }
2945
2946         /*
2947          * purge dm of old maps and save new set of maps formed by
2948          * considering current path state
2949          */
2950         remove_maps(vecs);
2951         vecs->mpvec = mpvec;
2952
2953         /*
2954          * start dm event waiter threads for these new maps
2955          */
2956         vector_foreach_slot(vecs->mpvec, mpp, i) {
2957                 if (wait_for_events(mpp, vecs)) {
2958                         remove_map(mpp, vecs->pathvec, vecs->mpvec);
2959                         i--;
2960                         continue;
2961                 }
2962                 if (setup_multipath(vecs, mpp))
2963                         i--;
2964         }
2965         return 0;
2966
2967 fail:
2968         vector_free(mpvec);
2969         return 1;
2970 }
2971
2972 int
2973 need_to_delay_reconfig(struct vectors * vecs)
2974 {
2975         struct multipath *mpp;
2976         int i;
2977
2978         if (!VECTOR_SIZE(vecs->mpvec))
2979                 return 0;
2980
2981         vector_foreach_slot(vecs->mpvec, mpp, i) {
2982                 if (mpp->wait_for_udev)
2983                         return 1;
2984         }
2985         return 0;
2986 }
2987
2988 void rcu_free_config(struct rcu_head *head)
2989 {
2990         struct config *conf = container_of(head, struct config, rcu);
2991
2992         free_config(conf);
2993 }
2994
2995 static bool reconfigure_check_uid_attrs(const struct _vector *old_attrs,
2996                                         const struct _vector *new_attrs)
2997 {
2998         int i;
2999         char *old;
3000
3001         if (VECTOR_SIZE(old_attrs) != VECTOR_SIZE(new_attrs))
3002                 return true;
3003
3004         vector_foreach_slot(old_attrs, old, i) {
3005                 char *new = VECTOR_SLOT(new_attrs, i);
3006
3007                 if (strcmp(old, new))
3008                         return true;
3009         }
3010
3011         return false;
3012 }
3013
3014 static void reconfigure_check(struct config *old, struct config *new)
3015 {
3016         int old_marginal_pathgroups;
3017
3018         old_marginal_pathgroups = old->marginal_pathgroups;
3019         if ((old_marginal_pathgroups == MARGINAL_PATHGROUP_FPIN) !=
3020             (new->marginal_pathgroups == MARGINAL_PATHGROUP_FPIN)) {
3021                 condlog(1, "multipathd must be restarted to turn %s fpin marginal paths",
3022                         (old_marginal_pathgroups == MARGINAL_PATHGROUP_FPIN)?
3023                         "off" : "on");
3024                 new->marginal_pathgroups = old_marginal_pathgroups;
3025         }
3026
3027         if (reconfigure_check_uid_attrs(&old->uid_attrs, &new->uid_attrs)) {
3028                 int i;
3029                 void *ptr;
3030
3031                 condlog(1, "multipathd must be restarted to change uid_attrs, keeping old values");
3032                 vector_foreach_slot(&new->uid_attrs, ptr, i)
3033                         free(ptr);
3034                 vector_reset(&new->uid_attrs);
3035                 new->uid_attrs = old->uid_attrs;
3036
3037                 /* avoid uid_attrs being freed in rcu_free_config() */
3038                 old->uid_attrs.allocated = 0;
3039                 old->uid_attrs.slot = NULL;
3040         }
3041 }
3042
3043 static int
3044 reconfigure (struct vectors *vecs, enum force_reload_types reload_type)
3045 {
3046         struct config * old, *conf;
3047
3048         conf = load_config(DEFAULT_CONFIGFILE);
3049         if (!conf)
3050                 return 1;
3051
3052         if (verbosity)
3053                 libmp_verbosity = verbosity;
3054         setlogmask(LOG_UPTO(libmp_verbosity + 3));
3055         condlog(2, "%s: setting up paths and maps", __func__);
3056
3057         /*
3058          * free old map and path vectors ... they use old conf state
3059          */
3060         if (VECTOR_SIZE(vecs->mpvec))
3061                 remove_maps_and_stop_waiters(vecs);
3062
3063         free_pathvec(vecs->pathvec, FREE_PATHS);
3064         vecs->pathvec = NULL;
3065         delete_all_foreign();
3066
3067         reset_checker_classes();
3068         if (bindings_read_only)
3069                 conf->bindings_read_only = bindings_read_only;
3070
3071         if (check_alias_settings(conf))
3072                 return 1;
3073
3074         uxsock_timeout = conf->uxsock_timeout;
3075
3076         old = rcu_dereference(multipath_conf);
3077         reconfigure_check(old, conf);
3078
3079         conf->sequence_nr = old->sequence_nr + 1;
3080         rcu_assign_pointer(multipath_conf, conf);
3081         call_rcu(&old->rcu, rcu_free_config);
3082 #ifdef FPIN_EVENT_HANDLER
3083         fpin_clean_marginal_dev_list(NULL);
3084 #endif
3085         configure(vecs, reload_type);
3086
3087         return 0;
3088 }
3089
3090 static struct vectors *
3091 init_vecs (void)
3092 {
3093         struct vectors * vecs;
3094
3095         vecs = (struct vectors *)calloc(1, sizeof(struct vectors));
3096
3097         if (!vecs)
3098                 return NULL;
3099
3100         init_lock(&vecs->lock);
3101
3102         return vecs;
3103 }
3104
3105 static void *
3106 signal_set(int signo, void (*func) (int))
3107 {
3108         int r;
3109         struct sigaction sig;
3110         struct sigaction osig;
3111
3112         sig.sa_handler = func;
3113         sigemptyset(&sig.sa_mask);
3114         sig.sa_flags = 0;
3115
3116         r = sigaction(signo, &sig, &osig);
3117
3118         if (r < 0)
3119                 return (SIG_ERR);
3120         else
3121                 return (osig.sa_handler);
3122 }
3123
3124 void
3125 handle_signals(bool nonfatal)
3126 {
3127         if (exit_sig) {
3128                 condlog(3, "exit (signal)");
3129                 exit_sig = 0;
3130                 exit_daemon();
3131         }
3132         if (!nonfatal)
3133                 return;
3134         if (reconfig_sig) {
3135                 condlog(3, "reconfigure (signal)");
3136                 schedule_reconfigure(FORCE_RELOAD_WEAK);
3137         }
3138         if (log_reset_sig) {
3139                 condlog(3, "reset log (signal)");
3140                 if (logsink == LOGSINK_SYSLOG)
3141                         log_thread_reset();
3142         }
3143         reconfig_sig = 0;
3144         log_reset_sig = 0;
3145 }
3146
3147 static void
3148 sighup(__attribute__((unused)) int sig)
3149 {
3150         reconfig_sig = 1;
3151 }
3152
3153 static void
3154 sigend(__attribute__((unused)) int sig)
3155 {
3156         exit_sig = 1;
3157 }
3158
3159 static void
3160 sigusr1(__attribute__((unused)) int sig)
3161 {
3162         log_reset_sig = 1;
3163 }
3164
3165 static void
3166 sigusr2(__attribute__((unused)) int sig)
3167 {
3168         condlog(3, "SIGUSR2 received");
3169 }
3170
3171 static void
3172 signal_init(void)
3173 {
3174         sigset_t set;
3175
3176         /* block all signals */
3177         sigfillset(&set);
3178         /* SIGPIPE occurs if logging fails */
3179         sigdelset(&set, SIGPIPE);
3180         pthread_sigmask(SIG_SETMASK, &set, NULL);
3181
3182         /* Other signals will be unblocked in the uxlsnr thread */
3183         signal_set(SIGHUP, sighup);
3184         signal_set(SIGUSR1, sigusr1);
3185         signal_set(SIGUSR2, sigusr2);
3186         signal_set(SIGINT, sigend);
3187         signal_set(SIGTERM, sigend);
3188         signal_set(SIGPIPE, sigend);
3189 }
3190
3191 static void
3192 setscheduler (void)
3193 {
3194         int res;
3195         static struct sched_param sched_param = {
3196                 .sched_priority = 99
3197         };
3198
3199         res = sched_setscheduler (0, SCHED_RR, &sched_param);
3200
3201         if (res == -1)
3202                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
3203         return;
3204 }
3205
3206 static void set_oom_adj(void)
3207 {
3208         FILE *fp;
3209
3210         if (getenv("OOMScoreAdjust")) {
3211                 condlog(3, "Using systemd provided OOMScoreAdjust");
3212                 return;
3213         }
3214 #ifdef OOM_SCORE_ADJ_MIN
3215         fp = fopen("/proc/self/oom_score_adj", "w");
3216         if (fp) {
3217                 fprintf(fp, "%i", OOM_SCORE_ADJ_MIN);
3218                 fclose(fp);
3219                 return;
3220         }
3221 #endif
3222         fp = fopen("/proc/self/oom_adj", "w");
3223         if (fp) {
3224                 fprintf(fp, "%i", OOM_ADJUST_MIN);
3225                 fclose(fp);
3226                 return;
3227         }
3228         condlog(0, "couldn't adjust oom score");
3229 }
3230
3231 static void cleanup_pidfile(void)
3232 {
3233         if (pid_fd >= 0)
3234                 close(pid_fd);
3235         condlog(3, "unlink pidfile");
3236         unlink(DEFAULT_PIDFILE);
3237 }
3238
3239 static void cleanup_conf(void) {
3240         struct config *conf;
3241
3242         conf = rcu_dereference(multipath_conf);
3243         if (!conf)
3244                 return;
3245         rcu_assign_pointer(multipath_conf, NULL);
3246         call_rcu(&conf->rcu, rcu_free_config);
3247 }
3248
3249 static void cleanup_maps(struct vectors *vecs)
3250 {
3251         int queue_without_daemon, i;
3252         struct multipath *mpp;
3253         struct config *conf;
3254
3255         conf = get_multipath_config();
3256         queue_without_daemon = conf->queue_without_daemon;
3257         put_multipath_config(conf);
3258         if (queue_without_daemon == QUE_NO_DAEMON_OFF)
3259                 vector_foreach_slot(vecs->mpvec, mpp, i)
3260                         dm_queue_if_no_path(mpp->alias, 0);
3261         remove_maps_and_stop_waiters(vecs);
3262         vecs->mpvec = NULL;
3263 }
3264
3265 static void cleanup_paths(struct vectors *vecs)
3266 {
3267         free_pathvec(vecs->pathvec, FREE_PATHS);
3268         vecs->pathvec = NULL;
3269 }
3270
3271 static void cleanup_vecs(void)
3272 {
3273         if (!gvecs)
3274                 return;
3275         /*
3276          * We can't take the vecs lock here, because exit() may
3277          * have been called from the child() thread, holding the lock already.
3278          * Anyway, by the time we get here, all threads that might access
3279          * vecs should have been joined already (in cleanup_threads).
3280          */
3281         cleanup_maps(gvecs);
3282         cleanup_paths(gvecs);
3283         pthread_mutex_destroy(&gvecs->lock.mutex);
3284         free(gvecs);
3285         gvecs = NULL;
3286 }
3287
3288 static void cleanup_threads(void)
3289 {
3290         stop_io_err_stat_thread();
3291
3292         if (check_thr_started)
3293                 pthread_cancel(check_thr);
3294         if (uevent_thr_started)
3295                 pthread_cancel(uevent_thr);
3296         if (uxlsnr_thr_started)
3297                 pthread_cancel(uxlsnr_thr);
3298         if (uevq_thr_started)
3299                 pthread_cancel(uevq_thr);
3300         if (dmevent_thr_started)
3301                 pthread_cancel(dmevent_thr);
3302         if (fpin_thr_started)
3303                 pthread_cancel(fpin_thr);
3304         if (fpin_consumer_thr_started)
3305                 pthread_cancel(fpin_consumer_thr);
3306
3307
3308         if (check_thr_started)
3309                 pthread_join(check_thr, NULL);
3310         if (uevent_thr_started)
3311                 pthread_join(uevent_thr, NULL);
3312         if (uxlsnr_thr_started)
3313                 pthread_join(uxlsnr_thr, NULL);
3314         if (uevq_thr_started)
3315                 pthread_join(uevq_thr, NULL);
3316         if (dmevent_thr_started)
3317                 pthread_join(dmevent_thr, NULL);
3318         if (fpin_thr_started)
3319                 pthread_join(fpin_thr, NULL);
3320         if (fpin_consumer_thr_started)
3321                 pthread_join(fpin_consumer_thr, NULL);
3322
3323
3324         /*
3325          * As all threads are joined now, and we're in DAEMON_SHUTDOWN
3326          * state, no new waiter threads will be created any more.
3327          */
3328         pthread_attr_destroy(&waiter_attr);
3329 }
3330
3331 #ifndef URCU_VERSION
3332 #  define URCU_VERSION 0
3333 #endif
3334 #if (URCU_VERSION >= 0x000800)
3335 /*
3336  * Use a non-default call_rcu_data for child().
3337  *
3338  * We do this to avoid a memory leak from liburcu.
3339  * liburcu never frees the default rcu handler (see comments on
3340  * call_rcu_data_free() in urcu-call-rcu-impl.h), its thread
3341  * can't be joined with pthread_join(), leaving a memory leak.
3342  *
3343  * Therefore we create our own, which can be destroyed and joined.
3344  * The cleanup handler needs to call rcu_barrier(), which is only
3345  * available in user-space RCU v0.8 and newer. See
3346  * https://lists.lttng.org/pipermail/lttng-dev/2021-May/029958.html
3347  */
3348 static struct call_rcu_data *setup_rcu(void)
3349 {
3350         struct call_rcu_data *crdp;
3351
3352         rcu_init();
3353         rcu_register_thread();
3354         crdp = create_call_rcu_data(0UL, -1);
3355         if (crdp != NULL)
3356                 set_thread_call_rcu_data(crdp);
3357         return crdp;
3358 }
3359
3360 static struct call_rcu_data *mp_rcu_data;
3361
3362 static void cleanup_rcu(void)
3363 {
3364         pthread_t rcu_thread;
3365
3366         /* Wait for any pending RCU calls */
3367         rcu_barrier();
3368         if (mp_rcu_data != NULL) {
3369                 rcu_thread = get_call_rcu_thread(mp_rcu_data);
3370                 /* detach this thread from the RCU thread */
3371                 set_thread_call_rcu_data(NULL);
3372                 synchronize_rcu();
3373                 /* tell RCU thread to exit */
3374                 call_rcu_data_free(mp_rcu_data);
3375                 pthread_join(rcu_thread, NULL);
3376         }
3377         rcu_unregister_thread();
3378 }
3379 #endif /* URCU_VERSION */
3380
3381 static void cleanup_child(void)
3382 {
3383         cleanup_threads();
3384         cleanup_vecs();
3385         cleanup_bindings();
3386         if (poll_dmevents)
3387                 cleanup_dmevent_waiter();
3388
3389         cleanup_pidfile();
3390         if (logsink == LOGSINK_SYSLOG)
3391                 log_thread_stop();
3392
3393         cleanup_conf();
3394 }
3395
3396 static int sd_notify_exit(int err)
3397 {
3398 #ifdef USE_SYSTEMD
3399         char msg[24];
3400
3401         snprintf(msg, sizeof(msg), "ERRNO=%d", err);
3402         sd_notify(0, msg);
3403 #endif
3404         return err;
3405 }
3406
3407 static int
3408 child (__attribute__((unused)) void *param)
3409 {
3410         pthread_attr_t log_attr, misc_attr, uevent_attr;
3411         struct vectors * vecs;
3412         int rc;
3413         struct config *conf;
3414         char *envp;
3415         enum daemon_status state;
3416         int exit_code = 1;
3417         int fpin_marginal_paths = 0;
3418
3419         init_unwinder();
3420         mlockall(MCL_CURRENT | MCL_FUTURE);
3421         signal_init();
3422 #if (URCU_VERSION >= 0x000800)
3423         mp_rcu_data = setup_rcu();
3424         if (atexit(cleanup_rcu))
3425                 fprintf(stderr, "failed to register RCU cleanup handler\n");
3426 #else
3427         rcu_init();
3428 #endif
3429         if (atexit(cleanup_child))
3430                 fprintf(stderr, "failed to register cleanup handlers\n");
3431
3432         setup_thread_attr(&misc_attr, 64 * 1024, 0);
3433         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
3434         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
3435
3436         if (logsink == LOGSINK_SYSLOG) {
3437                 setup_thread_attr(&log_attr, 64 * 1024, 0);
3438                 log_thread_start(&log_attr);
3439                 pthread_attr_destroy(&log_attr);
3440         }
3441         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
3442         if (pid_fd < 0) {
3443                 condlog(1, "failed to create pidfile");
3444                 exit(1);
3445         }
3446
3447         post_config_state(DAEMON_START);
3448
3449         condlog(2, "multipathd v%d.%d.%d%s: start up",
3450                 MULTIPATH_VERSION(VERSION_CODE), EXTRAVERSION);
3451         condlog(3, "read " DEFAULT_CONFIGFILE);
3452
3453         if (verbosity)
3454                 libmp_verbosity = verbosity;
3455         conf = load_config(DEFAULT_CONFIGFILE);
3456         if (verbosity)
3457                 libmp_verbosity = verbosity;
3458         setlogmask(LOG_UPTO(libmp_verbosity + 3));
3459
3460         if (!conf) {
3461                 condlog(0, "failed to load configuration");
3462                 goto failed;
3463         }
3464
3465         if (bindings_read_only)
3466                 conf->bindings_read_only = bindings_read_only;
3467         uxsock_timeout = conf->uxsock_timeout;
3468         rcu_assign_pointer(multipath_conf, conf);
3469         if (init_checkers()) {
3470                 condlog(0, "failed to initialize checkers");
3471                 goto failed;
3472         }
3473         if (init_prio()) {
3474                 condlog(0, "failed to initialize prioritizers");
3475                 goto failed;
3476         }
3477         /* Failing this is non-fatal */
3478
3479         init_foreign(conf->enable_foreign);
3480
3481         if (poll_dmevents)
3482                 poll_dmevents = dmevent_poll_supported();
3483
3484         envp = getenv("LimitNOFILE");
3485
3486         if (envp)
3487                 condlog(2,"Using systemd provided open fds limit of %s", envp);
3488         else
3489                 set_max_fds(conf->max_fds);
3490
3491         vecs = gvecs = init_vecs();
3492         if (!vecs)
3493                 goto failed;
3494
3495         setscheduler();
3496         set_oom_adj();
3497 #ifdef FPIN_EVENT_HANDLER
3498         if (conf->marginal_pathgroups == MARGINAL_PATHGROUP_FPIN)
3499                 fpin_marginal_paths = 1;
3500 #endif
3501         /*
3502          * Startup done, invalidate configuration
3503          */
3504         conf = NULL;
3505
3506         pthread_cleanup_push(config_cleanup, NULL);
3507         pthread_mutex_lock(&config_lock);
3508
3509         rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
3510         if (!rc) {
3511                 /* Wait for uxlsnr startup */
3512                 while (running_state == DAEMON_START)
3513                         pthread_cond_wait(&config_cond, &config_lock);
3514                 state = running_state;
3515         }
3516         pthread_cleanup_pop(1);
3517
3518         if (rc) {
3519                 condlog(0, "failed to create cli listener: %d", rc);
3520                 goto failed;
3521         }
3522         else {
3523                 uxlsnr_thr_started = true;
3524                 if (state != DAEMON_CONFIGURE) {
3525                         condlog(0, "cli listener failed to start");
3526                         goto failed;
3527                 }
3528         }
3529
3530         if (poll_dmevents) {
3531                 if (init_dmevent_waiter(vecs)) {
3532                         condlog(0, "failed to allocate dmevents waiter info");
3533                         goto failed;
3534                 }
3535                 if ((rc = pthread_create(&dmevent_thr, &misc_attr,
3536                                          wait_dmevents, NULL))) {
3537                         condlog(0, "failed to create dmevent waiter thread: %d",
3538                                 rc);
3539                         goto failed;
3540                 } else
3541                         dmevent_thr_started = true;
3542         }
3543
3544         /*
3545          * Start uevent listener early to catch events
3546          */
3547         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
3548                 condlog(0, "failed to create uevent thread: %d", rc);
3549                 goto failed;
3550         } else
3551                 uevent_thr_started = true;
3552         pthread_attr_destroy(&uevent_attr);
3553
3554         /*
3555          * start threads
3556          */
3557         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
3558                 condlog(0,"failed to create checker loop thread: %d", rc);
3559                 goto failed;
3560         } else
3561                 check_thr_started = true;
3562         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
3563                 condlog(0, "failed to create uevent dispatcher: %d", rc);
3564                 goto failed;
3565         } else
3566                 uevq_thr_started = true;
3567
3568         if (fpin_marginal_paths) {
3569                 if ((rc = pthread_create(&fpin_thr, &misc_attr,
3570                         fpin_fabric_notification_receiver, NULL))) {
3571                         condlog(0, "failed to create the fpin receiver thread: %d", rc);
3572                         goto failed;
3573                 } else
3574                         fpin_thr_started = true;
3575
3576                 if ((rc = pthread_create(&fpin_consumer_thr,
3577                         &misc_attr, fpin_els_li_consumer, vecs))) {
3578                         condlog(0, "failed to create the fpin consumer thread thread: %d", rc);
3579                         goto failed;
3580                 } else
3581                         fpin_consumer_thr_started = true;
3582         }
3583         pthread_attr_destroy(&misc_attr);
3584
3585         while (1) {
3586                 int rc = 0;
3587
3588                 pthread_cleanup_push(config_cleanup, NULL);
3589                 pthread_mutex_lock(&config_lock);
3590                 while (running_state != DAEMON_CONFIGURE &&
3591                        running_state != DAEMON_SHUTDOWN &&
3592                        /*
3593                         * Check if another reconfigure request was scheduled
3594                         * while we last ran reconfigure().
3595                         * We have to test __delayed_reconfig here
3596                         * to avoid a busy loop
3597                         */
3598                        (reconfigure_pending == FORCE_RELOAD_NONE
3599                          || __delayed_reconfig))
3600                         pthread_cond_wait(&config_cond, &config_lock);
3601
3602                 if (running_state != DAEMON_CONFIGURE &&
3603                     running_state != DAEMON_SHUTDOWN)
3604                         /* This sets running_state to DAEMON_CONFIGURE */
3605                         __post_config_state(DAEMON_CONFIGURE);
3606                 state = running_state;
3607                 pthread_cleanup_pop(1);
3608                 if (state == DAEMON_SHUTDOWN)
3609                         break;
3610
3611                 /* handle DAEMON_CONFIGURE */
3612                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
3613                 lock(&vecs->lock);
3614                 pthread_testcancel();
3615                 if (!need_to_delay_reconfig(vecs)) {
3616                         enum force_reload_types reload_type;
3617
3618                         pthread_mutex_lock(&config_lock);
3619                         reload_type = reconfigure_pending == FORCE_RELOAD_YES ?
3620                                 FORCE_RELOAD_YES : FORCE_RELOAD_WEAK;
3621                         reconfigure_pending = FORCE_RELOAD_NONE;
3622                         __delayed_reconfig = false;
3623                         pthread_mutex_unlock(&config_lock);
3624
3625                         rc = reconfigure(vecs, reload_type);
3626                 } else {
3627                         pthread_mutex_lock(&config_lock);
3628                         __delayed_reconfig = true;
3629                         pthread_mutex_unlock(&config_lock);
3630                         condlog(3, "delaying reconfigure()");
3631                 }
3632                 lock_cleanup_pop(vecs->lock);
3633                 if (!rc)
3634                         post_config_state(DAEMON_IDLE);
3635                 else {
3636                         condlog(0, "fatal error applying configuration - aborting");
3637                         exit_daemon();
3638                 }
3639         }
3640
3641         exit_code = 0;
3642 failed:
3643         condlog(2, "multipathd: shut down");
3644         /* All cleanup is done in the cleanup_child() exit handler */
3645         return sd_notify_exit(exit_code);
3646 }
3647
3648 static void cleanup_close(int *pfd)
3649 {
3650         if (*pfd != -1 && *pfd != STDIN_FILENO && *pfd != STDOUT_FILENO &&
3651             *pfd != STDERR_FILENO)
3652                 close(*pfd);
3653 }
3654
3655 static int
3656 daemonize(void)
3657 {
3658         int pid;
3659         int dev_null_fd __attribute__((cleanup(cleanup_close))) = -1;
3660
3661         if( (pid = fork()) < 0){
3662                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
3663                 return -1;
3664         }
3665         else if (pid != 0)
3666                 return pid;
3667
3668         setsid();
3669
3670         if ( (pid = fork()) < 0)
3671                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
3672         else if (pid != 0)
3673                 _exit(0);
3674
3675         if (chdir("/") < 0)
3676                 fprintf(stderr, "cannot chdir to '/', continuing\n");
3677
3678         dev_null_fd = open("/dev/null", O_RDWR);
3679         if (dev_null_fd < 0){
3680                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
3681                         strerror(errno));
3682                 _exit(0);
3683         }
3684
3685         if (dup2(dev_null_fd, STDIN_FILENO) < 0) {
3686                 fprintf(stderr, "cannot dup2 /dev/null to stdin : %s\n",
3687                         strerror(errno));
3688                 _exit(0);
3689         }
3690         if (dup2(dev_null_fd, STDOUT_FILENO) < 0) {
3691                 fprintf(stderr, "cannot dup2 /dev/null to stdout : %s\n",
3692                         strerror(errno));
3693                 _exit(0);
3694         }
3695         if (dup2(dev_null_fd, STDERR_FILENO) < 0) {
3696                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
3697                         strerror(errno));
3698                 _exit(0);
3699         }
3700         daemon_pid = getpid();
3701         return 0;
3702 }
3703
3704 int
3705 main (int argc, char *argv[])
3706 {
3707         extern char *optarg;
3708         extern int optind;
3709         int arg;
3710         int err = 0;
3711         int foreground = 0;
3712         struct config *conf;
3713         char *opt_k_arg = NULL;
3714         bool opt_k = false;
3715
3716         ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
3717                                    "Manipulated through RCU");
3718         ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
3719                 "Suppress complaints about this scalar variable");
3720
3721         logsink = LOGSINK_SYSLOG;
3722
3723         /* make sure we don't lock any path */
3724         if (chdir("/") < 0)
3725                 fprintf(stderr, "can't chdir to root directory : %s\n",
3726                         strerror(errno));
3727         umask(umask(077) | 022);
3728
3729         pthread_cond_init_mono(&config_cond);
3730
3731         if (atexit(dm_lib_exit))
3732                 condlog(3, "failed to register exit handler for libdm");
3733
3734         libmultipath_init();
3735         if (atexit(libmultipath_exit))
3736                 condlog(3, "failed to register exit handler for libmultipath");
3737         libmp_udev_set_sync_support(0);
3738
3739         while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) {
3740                 switch(arg) {
3741                 case 'd':
3742                         foreground = 1;
3743                         if (logsink == LOGSINK_SYSLOG)
3744                                 logsink = LOGSINK_STDERR_WITH_TIME;
3745                         break;
3746                 case 'v':
3747                         if (sizeof(optarg) > sizeof(char *) ||
3748                             !isdigit(optarg[0]))
3749                                 exit(1);
3750
3751                         libmp_verbosity = verbosity = atoi(optarg);
3752                         break;
3753                 case 's':
3754                         logsink = LOGSINK_STDERR_WITHOUT_TIME;
3755                         break;
3756                 case 'k':
3757                         opt_k = true;
3758                         opt_k_arg = optarg;
3759                         break;
3760                 case 'B':
3761                         bindings_read_only = 1;
3762                         break;
3763                 case 'n':
3764                         condlog(0, "WARNING: ignoring deprecated option -n, use 'ignore_wwids = no' instead");
3765                         break;
3766                 case 'w':
3767                         poll_dmevents = 0;
3768                         break;
3769                 default:
3770                         fprintf(stderr, "Invalid argument '-%c'\n",
3771                                 optopt);
3772                         exit(1);
3773                 }
3774         }
3775         if (opt_k || optind < argc) {
3776                 char cmd[CMDSIZE];
3777                 char * s = cmd;
3778                 char * c = s;
3779
3780                 logsink = LOGSINK_STDERR_WITH_TIME;
3781                 if (verbosity)
3782                         libmp_verbosity = verbosity;
3783                 conf = load_config(DEFAULT_CONFIGFILE);
3784                 if (!conf)
3785                         exit(1);
3786                 if (verbosity)
3787                         libmp_verbosity = verbosity;
3788                 uxsock_timeout = conf->uxsock_timeout;
3789                 memset(cmd, 0x0, CMDSIZE);
3790                 if (opt_k)
3791                         s = opt_k_arg;
3792                 else {
3793                         while (optind < argc) {
3794                                 if (strchr(argv[optind], ' '))
3795                                         c += snprintf(c, s + CMDSIZE - c,
3796                                                       "\"%s\" ", argv[optind]);
3797                                 else
3798                                         c += snprintf(c, s + CMDSIZE - c,
3799                                                       "%s ", argv[optind]);
3800                                 optind++;
3801                                 if (c >= s + CMDSIZE) {
3802                                         fprintf(stderr, "multipathd command too large\n");
3803                                         exit(1);
3804                                 }
3805                         }
3806                         c += snprintf(c, s + CMDSIZE - c, "\n");
3807                 }
3808                 if (!s) {
3809                         char tmo_buf[16];
3810
3811                         snprintf(tmo_buf, sizeof(tmo_buf), "%d",
3812                                  uxsock_timeout + 100);
3813                         if (execl(BINDIR "/multipathc", "multipathc",
3814                                   tmo_buf, NULL) == -1) {
3815                                 condlog(0, "ERROR: failed to execute multipathc: %m");
3816                                 err = 1;
3817                         }
3818                 } else
3819                         err = uxclnt(s, uxsock_timeout + 100);
3820                 free_config(conf);
3821                 return err;
3822         }
3823
3824         if (getuid() != 0) {
3825                 fprintf(stderr, "need to be root\n");
3826                 exit(1);
3827         }
3828
3829         if (foreground) {
3830                 if (!isatty(fileno(stdout)))
3831                         setbuf(stdout, NULL);
3832                 err = 0;
3833                 daemon_pid = getpid();
3834         } else
3835                 err = daemonize();
3836
3837         if (err < 0)
3838                 /* error */
3839                 exit(1);
3840         else if (err > 0)
3841                 /* parent dies */
3842                 exit(0);
3843         else
3844                 /* child lives */
3845                 return (child(NULL));
3846 }
3847
3848 void *  mpath_pr_event_handler_fn (void * pathp )
3849 {
3850         struct multipath * mpp;
3851         unsigned int i;
3852         int ret, isFound;
3853         struct path * pp = (struct path *)pathp;
3854         struct prout_param_descriptor *param;
3855         struct prin_resp *resp;
3856
3857         rcu_register_thread();
3858         mpp = pp->mpp;
3859
3860         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
3861         if (!resp){
3862                 condlog(0,"%s Alloc failed for prin response", pp->dev);
3863                 goto out;
3864         }
3865
3866         mpp->prflag = PRFLAG_UNSET;
3867         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
3868         if (ret != MPATH_PR_SUCCESS )
3869         {
3870                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
3871                 goto out;
3872         }
3873
3874         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
3875                         resp->prin_descriptor.prin_readkeys.additional_length );
3876
3877         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
3878         {
3879                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
3880                 goto out;
3881         }
3882         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ",
3883                 get_be64(mpp->reservation_key));
3884
3885         isFound =0;
3886         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
3887         {
3888                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
3889                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
3890                 if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
3891                 {
3892                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
3893                         isFound =1;
3894                         break;
3895                 }
3896         }
3897         if (!isFound)
3898         {
3899                 condlog(0, "%s: Either device not registered or ", pp->dev);
3900                 condlog(0, "host is not authorised for registration. Skip path");
3901                 goto out;
3902         }
3903
3904         param = (struct prout_param_descriptor *)calloc(1, sizeof(struct prout_param_descriptor));
3905         if (!param)
3906                 goto out;
3907
3908         param->sa_flags = mpp->sa_flags;
3909         memcpy(param->sa_key, &mpp->reservation_key, 8);
3910         param->num_transportid = 0;
3911
3912         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
3913
3914         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
3915         if (ret != MPATH_PR_SUCCESS )
3916         {
3917                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
3918         }
3919         mpp->prflag = PRFLAG_SET;
3920
3921         free(param);
3922 out:
3923         if (resp)
3924                 free(resp);
3925         rcu_unregister_thread();
3926         return NULL;
3927 }
3928
3929 int mpath_pr_event_handle(struct path *pp)
3930 {
3931         pthread_t thread;
3932         int rc;
3933         pthread_attr_t attr;
3934         struct multipath * mpp;
3935
3936         if (pp->bus != SYSFS_BUS_SCSI)
3937                 goto no_pr;
3938
3939         mpp = pp->mpp;
3940
3941         if (!get_be64(mpp->reservation_key))
3942                 goto no_pr;
3943
3944         pthread_attr_init(&attr);
3945         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3946
3947         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
3948         if (rc) {
3949                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
3950                 return -1;
3951         }
3952         pthread_attr_destroy(&attr);
3953         rc = pthread_join(thread, NULL);
3954         return 0;
3955
3956 no_pr:
3957         pp->mpp->prflag = PRFLAG_UNSET;
3958         return 0;
3959 }