7a57a798070608e374d7a792804aa6a0994b36c1
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <limits.h>
16 #include <linux/oom.h>
17 #include <libudev.h>
18 #include <urcu.h>
19 #ifdef USE_SYSTEMD
20 #include <systemd/sd-daemon.h>
21 #endif
22 #include <semaphore.h>
23 #include <time.h>
24 #include <stdbool.h>
25
26 /*
27  * libmultipath
28  */
29 #include "time-util.h"
30
31 /*
32  * libcheckers
33  */
34 #include "checkers.h"
35
36 /*
37  * libmultipath
38  */
39 #include "parser.h"
40 #include "vector.h"
41 #include "config.h"
42 #include "util.h"
43 #include "hwtable.h"
44 #include "defaults.h"
45 #include "structs.h"
46 #include "blacklist.h"
47 #include "structs_vec.h"
48 #include "dmparser.h"
49 #include "devmapper.h"
50 #include "sysfs.h"
51 #include "dict.h"
52 #include "discovery.h"
53 #include "debug.h"
54 #include "propsel.h"
55 #include "uevent.h"
56 #include "switchgroup.h"
57 #include "print.h"
58 #include "configure.h"
59 #include "prio.h"
60 #include "wwids.h"
61 #include "pgpolicies.h"
62 #include "uevent.h"
63 #include "log.h"
64 #include "uxsock.h"
65 #include "alias.h"
66
67 #include "mpath_cmd.h"
68 #include "mpath_persist.h"
69
70 #include "prioritizers/alua_rtpg.h"
71
72 #include "main.h"
73 #include "pidfile.h"
74 #include "uxlsnr.h"
75 #include "uxclnt.h"
76 #include "cli.h"
77 #include "cli_handlers.h"
78 #include "lock.h"
79 #include "waiter.h"
80 #include "dmevents.h"
81 #include "io_err_stat.h"
82 #include "wwids.h"
83 #include "foreign.h"
84 #include "../third-party/valgrind/drd.h"
85 #include "init_unwinder.h"
86
87 #define FILE_NAME_SIZE 256
88 #define CMDSIZE 160
89 #define MSG_SIZE 32
90
91 #define LOG_MSG(lvl, pp)                                        \
92 do {                                                            \
93         if (pp->mpp && checker_selected(&pp->checker) &&        \
94             lvl <= libmp_verbosity) {                                   \
95                 if (pp->offline)                                \
96                         condlog(lvl, "%s: %s - path offline",   \
97                                 pp->mpp->alias, pp->dev);       \
98                 else  {                                         \
99                         const char *__m =                       \
100                                 checker_message(&pp->checker);  \
101                                                                 \
102                         if (strlen(__m))                              \
103                                 condlog(lvl, "%s: %s - %s checker%s", \
104                                         pp->mpp->alias,               \
105                                         pp->dev,                      \
106                                         checker_name(&pp->checker),   \
107                                         __m);                         \
108                 }                                                     \
109         }                                                             \
110 } while(0)
111
112 struct mpath_event_param
113 {
114         char * devname;
115         struct multipath *mpp;
116 };
117
118 int uxsock_timeout;
119 static int verbosity;
120 static int bindings_read_only;
121 int ignore_new_devs;
122 #ifdef NO_DMEVENTS_POLL
123 static int poll_dmevents = 0;
124 #else
125 static int poll_dmevents = 1;
126 #endif
127 /* Don't access this variable without holding config_lock */
128 static volatile enum daemon_status running_state = DAEMON_INIT;
129 /* Don't access this variable without holding config_lock */
130 static bool __delayed_reconfig;
131 pid_t daemon_pid;
132 static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
133 static pthread_cond_t config_cond;
134 static pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr;
135 static bool check_thr_started, uevent_thr_started, uxlsnr_thr_started,
136         uevq_thr_started, dmevent_thr_started;
137 static int pid_fd = -1;
138
139 static inline enum daemon_status get_running_state(void)
140 {
141         enum daemon_status st;
142
143         pthread_mutex_lock(&config_lock);
144         st = running_state;
145         pthread_mutex_unlock(&config_lock);
146         return st;
147 }
148
149 int should_exit(void)
150 {
151         return get_running_state() == DAEMON_SHUTDOWN;
152 }
153
154 static bool get_delayed_reconfig(void)
155 {
156         bool val;
157
158         pthread_mutex_lock(&config_lock);
159         val = __delayed_reconfig;
160         pthread_mutex_unlock(&config_lock);
161         return val;
162 }
163
164 /*
165  * global copy of vecs for use in sig handlers
166  */
167 static struct vectors * gvecs;
168
169 struct config *multipath_conf;
170
171 /* Local variables */
172 static volatile sig_atomic_t exit_sig;
173 static volatile sig_atomic_t reconfig_sig;
174 static volatile sig_atomic_t log_reset_sig;
175
176 static const char *daemon_status_msg[DAEMON_STATUS_SIZE] = {
177         [DAEMON_INIT] = "init",
178         [DAEMON_START] = "startup",
179         [DAEMON_CONFIGURE] = "configure",
180         [DAEMON_IDLE] = "idle",
181         [DAEMON_RUNNING] = "running",
182         [DAEMON_SHUTDOWN] = "shutdown",
183 };
184
185 const char *
186 daemon_status(void)
187 {
188         int status = get_running_state();
189
190         if (status < DAEMON_INIT || status >= DAEMON_STATUS_SIZE)
191                 return NULL;
192
193         return daemon_status_msg[status];
194 }
195
196 /*
197  * I love you too, systemd ...
198  */
199 #ifdef USE_SYSTEMD
200 static void do_sd_notify(enum daemon_status old_state,
201                          enum daemon_status new_state)
202 {
203         char notify_msg[MSG_SIZE];
204         const char *msg;
205         static bool startup_done = false;
206
207         /*
208          * Checkerloop switches back and forth between idle and running state.
209          * No need to tell systemd each time.
210          * These notifications cause a lot of overhead on dbus.
211          */
212         if ((new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING) &&
213             (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
214                 return;
215
216         if (new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING)
217                 msg = "up";
218         else
219                 msg = daemon_status_msg[new_state];
220
221         if (msg && !safe_sprintf(notify_msg, "STATUS=%s", msg))
222                 sd_notify(0, notify_msg);
223
224         if (new_state == DAEMON_SHUTDOWN) {
225                 /* Tell systemd that we're not RELOADING any more */
226                 if (old_state == DAEMON_CONFIGURE && startup_done)
227                         sd_notify(0, "READY=1");
228                 sd_notify(0, "STOPPING=1");
229         } else if (new_state == DAEMON_IDLE && old_state == DAEMON_CONFIGURE) {
230                 sd_notify(0, "READY=1");
231                 startup_done = true;
232         } else if (new_state == DAEMON_CONFIGURE && startup_done)
233                 sd_notify(0, "RELOADING=1");
234 }
235 #else
236 static void do_sd_notify(__attribute__((unused)) enum daemon_status old_state,
237                          __attribute__((unused)) enum daemon_status new_state)
238 {}
239 #endif
240
241 static void config_cleanup(__attribute__((unused)) void *arg)
242 {
243         pthread_mutex_unlock(&config_lock);
244 }
245
246 #define __wait_for_state_change(condition, ms)                          \
247         ({                                                              \
248                 struct timespec tmo;                                    \
249                 int rc = 0;                                             \
250                                                                         \
251                 if (condition) {                                        \
252                         get_monotonic_time(&tmo);                       \
253                         tmo.tv_nsec += (ms) * 1000 * 1000;              \
254                         normalize_timespec(&tmo);                       \
255                         do                                              \
256                                 rc = pthread_cond_timedwait(            \
257                                         &config_cond, &config_lock, &tmo); \
258                         while (rc == 0 && (condition));                 \
259                 }                                                       \
260                 rc;                                                     \
261         })
262
263 /*
264  * If the current status is @oldstate, wait for at most @ms milliseconds
265  * for the state to change, and return the new state, which may still be
266  * @oldstate.
267  */
268 enum daemon_status wait_for_state_change_if(enum daemon_status oldstate,
269                                             unsigned long ms)
270 {
271         enum daemon_status st;
272
273         if (oldstate == DAEMON_SHUTDOWN)
274                 return DAEMON_SHUTDOWN;
275
276         pthread_mutex_lock(&config_lock);
277         pthread_cleanup_push(config_cleanup, NULL);
278         __wait_for_state_change(running_state == oldstate, ms);
279         st = running_state;
280         pthread_cleanup_pop(1);
281         return st;
282 }
283
284 /* Don't access this variable without holding config_lock */
285 static enum force_reload_types reconfigure_pending = FORCE_RELOAD_NONE;
286 /* Only set while changing to DAEMON_CONFIGURE, and only access while
287  * reconfiguring or scheduling a delayed reconfig in DAEMON_CONFIGURE */
288 static volatile enum force_reload_types reload_type = FORCE_RELOAD_NONE;
289
290 static void enable_delayed_reconfig(void)
291 {
292         pthread_mutex_lock(&config_lock);
293         reconfigure_pending = reload_type;
294         __delayed_reconfig = true;
295         pthread_mutex_unlock(&config_lock);
296 }
297
298 /* must be called with config_lock held */
299 static void __post_config_state(enum daemon_status state)
300 {
301         if (state != running_state && running_state != DAEMON_SHUTDOWN) {
302                 enum daemon_status old_state = running_state;
303
304                 /*
305                  * Handle a pending reconfigure request.
306                  * DAEMON_IDLE is set from child() after reconfigure(),
307                  * or from checkerloop() after completing checkers.
308                  * In either case, child() will see DAEMON_CONFIGURE
309                  * again and start another reconfigure cycle.
310                  */
311                 if (reconfigure_pending != FORCE_RELOAD_NONE &&
312                     state == DAEMON_IDLE &&
313                     (old_state == DAEMON_CONFIGURE ||
314                      old_state == DAEMON_RUNNING)) {
315                         /*
316                          * notify systemd of transient idle state, lest systemd
317                          * thinks the reload lasts forever.
318                          */
319                         do_sd_notify(old_state, DAEMON_IDLE);
320                         old_state = DAEMON_IDLE;
321                         state = DAEMON_CONFIGURE;
322                 }
323                 if (state == DAEMON_CONFIGURE) {
324                         reload_type = (reconfigure_pending == FORCE_RELOAD_YES) ? FORCE_RELOAD_YES : FORCE_RELOAD_WEAK;
325                         reconfigure_pending = FORCE_RELOAD_NONE;
326                         __delayed_reconfig = false;
327                 }
328                 running_state = state;
329                 pthread_cond_broadcast(&config_cond);
330                 do_sd_notify(old_state, state);
331         }
332 }
333
334 void post_config_state(enum daemon_status state)
335 {
336         pthread_mutex_lock(&config_lock);
337         pthread_cleanup_push(config_cleanup, NULL);
338         __post_config_state(state);
339         pthread_cleanup_pop(1);
340 }
341
342 void schedule_reconfigure(enum force_reload_types requested_type)
343 {
344         pthread_mutex_lock(&config_lock);
345         pthread_cleanup_push(config_cleanup, NULL);
346         enum force_reload_types type;
347
348         type = (reconfigure_pending == FORCE_RELOAD_YES ||
349                 requested_type == FORCE_RELOAD_YES) ?
350                FORCE_RELOAD_YES : FORCE_RELOAD_WEAK;
351         switch (running_state)
352         {
353         case DAEMON_SHUTDOWN:
354                 break;
355         case DAEMON_IDLE:
356                 reconfigure_pending = type;
357                 __post_config_state(DAEMON_CONFIGURE);
358                 break;
359         case DAEMON_CONFIGURE:
360         case DAEMON_RUNNING:
361                 reconfigure_pending = type;
362                 break;
363         default:
364                 break;
365         }
366         pthread_cleanup_pop(1);
367 }
368
369 static enum daemon_status set_config_state(enum daemon_status state)
370 {
371         int rc = 0;
372         enum daemon_status st;
373
374         pthread_cleanup_push(config_cleanup, NULL);
375         pthread_mutex_lock(&config_lock);
376
377         while (rc == 0 &&
378                running_state != state &&
379                running_state != DAEMON_SHUTDOWN &&
380                running_state != DAEMON_IDLE) {
381                 rc = pthread_cond_wait(&config_cond, &config_lock);
382         }
383
384         if (rc == 0 && running_state == DAEMON_IDLE && state != DAEMON_IDLE)
385                 __post_config_state(state);
386         st = running_state;
387
388         pthread_cleanup_pop(1);
389         return st;
390 }
391
392 struct config *get_multipath_config(void)
393 {
394         rcu_read_lock();
395         return rcu_dereference(multipath_conf);
396 }
397
398 void put_multipath_config(__attribute__((unused)) void *arg)
399 {
400         rcu_read_unlock();
401 }
402
403 static int
404 need_switch_pathgroup (struct multipath * mpp, int refresh)
405 {
406         struct pathgroup * pgp;
407         struct path * pp;
408         unsigned int i, j;
409         struct config *conf;
410         int bestpg;
411
412         if (!mpp)
413                 return 0;
414
415         /*
416          * Refresh path priority values
417          */
418         if (refresh) {
419                 vector_foreach_slot (mpp->pg, pgp, i) {
420                         vector_foreach_slot (pgp->paths, pp, j) {
421                                 conf = get_multipath_config();
422                                 pthread_cleanup_push(put_multipath_config,
423                                                      conf);
424                                 pathinfo(pp, conf, DI_PRIO);
425                                 pthread_cleanup_pop(1);
426                         }
427                 }
428         }
429
430         if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
431                 return 0;
432
433         bestpg = select_path_group(mpp);
434         if (mpp->pgfailback == -FAILBACK_MANUAL)
435                 return 0;
436
437         mpp->bestpg = bestpg;
438         if (mpp->bestpg != mpp->nextpg)
439                 return 1;
440
441         return 0;
442 }
443
444 static void
445 switch_pathgroup (struct multipath * mpp)
446 {
447         mpp->stat_switchgroup++;
448         dm_switchgroup(mpp->alias, mpp->bestpg);
449         condlog(2, "%s: switch to path group #%i",
450                  mpp->alias, mpp->bestpg);
451 }
452
453 static int
454 wait_for_events(struct multipath *mpp, struct vectors *vecs)
455 {
456         if (poll_dmevents)
457                 return watch_dmevents(mpp->alias);
458         else
459                 return start_waiter_thread(mpp, vecs);
460 }
461
462 static void
463 remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs)
464 {
465         /* devices are automatically removed by the dmevent polling code,
466          * so they don't need to be manually removed here */
467         condlog(3, "%s: removing map from internal tables", mpp->alias);
468         if (!poll_dmevents)
469                 stop_waiter_thread(mpp);
470         remove_map(mpp, vecs->pathvec, vecs->mpvec);
471 }
472
473 static void
474 remove_maps_and_stop_waiters(struct vectors *vecs)
475 {
476         int i;
477         struct multipath * mpp;
478
479         if (!vecs)
480                 return;
481
482         if (!poll_dmevents) {
483                 vector_foreach_slot(vecs->mpvec, mpp, i)
484                         stop_waiter_thread(mpp);
485         }
486         else
487                 unwatch_all_dmevents();
488
489         remove_maps(vecs);
490 }
491
492 int __setup_multipath(struct vectors *vecs, struct multipath *mpp,
493                       int reset)
494 {
495         if (dm_get_info(mpp->alias, &mpp->dmi)) {
496                 /* Error accessing table */
497                 condlog(2, "%s: cannot access table", mpp->alias);
498                 goto out;
499         }
500
501         if (update_multipath_strings(mpp, vecs->pathvec) != DMP_OK) {
502                 condlog(0, "%s: failed to setup multipath", mpp->alias);
503                 goto out;
504         }
505
506         if (reset) {
507                 set_no_path_retry(mpp);
508                 if (VECTOR_SIZE(mpp->paths) != 0)
509                         dm_cancel_deferred_remove(mpp);
510         }
511
512         return 0;
513 out:
514         remove_map_and_stop_waiter(mpp, vecs);
515         return 1;
516 }
517
518 int update_multipath (struct vectors *vecs, char *mapname, int reset)
519 {
520         struct multipath *mpp;
521         struct pathgroup  *pgp;
522         struct path *pp;
523         int i, j;
524
525         mpp = find_mp_by_alias(vecs->mpvec, mapname);
526
527         if (!mpp) {
528                 condlog(3, "%s: multipath map not found", mapname);
529                 return 2;
530         }
531
532         if (__setup_multipath(vecs, mpp, reset))
533                 return 1; /* mpp freed in setup_multipath */
534
535         /*
536          * compare checkers states with DM states
537          */
538         vector_foreach_slot (mpp->pg, pgp, i) {
539                 vector_foreach_slot (pgp->paths, pp, j) {
540                         if (pp->dmstate != PSTATE_FAILED)
541                                 continue;
542
543                         if (pp->state != PATH_DOWN) {
544                                 struct config *conf;
545                                 int oldstate = pp->state;
546                                 unsigned int checkint;
547
548                                 conf = get_multipath_config();
549                                 checkint = conf->checkint;
550                                 put_multipath_config(conf);
551                                 condlog(2, "%s: mark as failed", pp->dev);
552                                 mpp->stat_path_failures++;
553                                 pp->state = PATH_DOWN;
554                                 if (oldstate == PATH_UP ||
555                                     oldstate == PATH_GHOST)
556                                         update_queue_mode_del_path(mpp);
557
558                                 /*
559                                  * if opportune,
560                                  * schedule the next check earlier
561                                  */
562                                 if (pp->tick > checkint)
563                                         pp->tick = checkint;
564                         }
565                 }
566         }
567         return 0;
568 }
569
570 static int
571 update_map (struct multipath *mpp, struct vectors *vecs, int new_map)
572 {
573         int retries = 3;
574         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
575         struct path *pp;
576         int i;
577
578 retry:
579         condlog(4, "%s: updating new map", mpp->alias);
580         if (adopt_paths(vecs->pathvec, mpp)) {
581                 condlog(0, "%s: failed to adopt paths for new map update",
582                         mpp->alias);
583                 retries = -1;
584                 goto fail;
585         }
586         verify_paths(mpp);
587         mpp->action = ACT_RELOAD;
588
589         if (mpp->prflag) {
590                 vector_foreach_slot(mpp->paths, pp, i) {
591                         if ((pp->state == PATH_UP)  || (pp->state == PATH_GHOST)) {
592                                 /* persistent reseravtion check*/
593                                 mpath_pr_event_handle(pp);
594                         }
595                 }
596         }
597
598         if (setup_map(mpp, &params, vecs)) {
599                 condlog(0, "%s: failed to setup new map in update", mpp->alias);
600                 retries = -1;
601                 goto fail;
602         }
603         if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
604                 condlog(0, "%s: map_udate sleep", mpp->alias);
605                 free(params);
606                 params = NULL;
607                 sleep(1);
608                 goto retry;
609         }
610
611 fail:
612         if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) {
613                 condlog(0, "%s: failed to create new map", mpp->alias);
614                 remove_map(mpp, vecs->pathvec, vecs->mpvec);
615                 return 1;
616         }
617
618         if (setup_multipath(vecs, mpp))
619                 return 1;
620
621         sync_map_state(mpp);
622
623         if (retries < 0)
624                 condlog(0, "%s: failed reload in new map update", mpp->alias);
625         return 0;
626 }
627
628 static struct multipath *
629 add_map_without_path (struct vectors *vecs, const char *alias)
630 {
631         struct multipath * mpp = alloc_multipath();
632         struct config *conf;
633
634         if (!mpp)
635                 return NULL;
636         if (!alias) {
637                 free(mpp);
638                 return NULL;
639         }
640
641         mpp->alias = strdup(alias);
642
643         if (dm_get_info(mpp->alias, &mpp->dmi)) {
644                 condlog(3, "%s: cannot access table", mpp->alias);
645                 goto out;
646         }
647         if (!strlen(mpp->wwid))
648                 dm_get_uuid(mpp->alias, mpp->wwid, WWID_SIZE);
649         if (!strlen(mpp->wwid))
650                 condlog(1, "%s: adding map with empty WWID", mpp->alias);
651         conf = get_multipath_config();
652         mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
653         put_multipath_config(conf);
654
655         if (update_multipath_table(mpp, vecs->pathvec, 0) != DMP_OK)
656                 goto out;
657
658         if (!vector_alloc_slot(vecs->mpvec))
659                 goto out;
660
661         vector_set_slot(vecs->mpvec, mpp);
662
663         if (update_map(mpp, vecs, 1) != 0) /* map removed */
664                 return NULL;
665
666         return mpp;
667 out:
668         remove_map(mpp, vecs->pathvec, vecs->mpvec);
669         return NULL;
670 }
671
672 static int
673 coalesce_maps(struct vectors *vecs, vector nmpv)
674 {
675         struct multipath * ompp;
676         vector ompv = vecs->mpvec;
677         unsigned int i, reassign_maps;
678         struct config *conf;
679
680         conf = get_multipath_config();
681         reassign_maps = conf->reassign_maps;
682         put_multipath_config(conf);
683         vector_foreach_slot (ompv, ompp, i) {
684                 condlog(3, "%s: coalesce map", ompp->alias);
685                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
686                         /*
687                          * remove all current maps not allowed by the
688                          * current configuration
689                          */
690                         if (dm_flush_map(ompp->alias)) {
691                                 condlog(0, "%s: unable to flush devmap",
692                                         ompp->alias);
693                                 /*
694                                  * may be just because the device is open
695                                  */
696                                 if (setup_multipath(vecs, ompp) != 0) {
697                                         i--;
698                                         continue;
699                                 }
700                                 if (!vector_alloc_slot(nmpv))
701                                         return 1;
702
703                                 vector_set_slot(nmpv, ompp);
704
705                                 vector_del_slot(ompv, i);
706                                 i--;
707                         }
708                         else
709                                 condlog(2, "%s devmap removed", ompp->alias);
710                 } else if (reassign_maps) {
711                         condlog(3, "%s: Reassign existing device-mapper"
712                                 " devices", ompp->alias);
713                         dm_reassign(ompp->alias);
714                 }
715         }
716         return 0;
717 }
718
719 static void
720 sync_maps_state(vector mpvec)
721 {
722         unsigned int i;
723         struct multipath *mpp;
724
725         vector_foreach_slot (mpvec, mpp, i)
726                 sync_map_state(mpp);
727 }
728
729 int
730 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
731 {
732         int r;
733
734         if (nopaths)
735                 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
736         else
737                 r = dm_flush_map(mpp->alias);
738         /*
739          * clear references to this map before flushing so we can ignore
740          * the spurious uevent we may generate with the dm_flush_map call below
741          */
742         if (r) {
743                 /*
744                  * May not really be an error -- if the map was already flushed
745                  * from the device mapper by dmsetup(8) for instance.
746                  */
747                 if (r == 1)
748                         condlog(0, "%s: can't flush", mpp->alias);
749                 else {
750                         condlog(2, "%s: devmap deferred remove", mpp->alias);
751                         mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
752                 }
753                 return r;
754         }
755         else
756                 condlog(2, "%s: map flushed", mpp->alias);
757
758         remove_map_and_stop_waiter(mpp, vecs);
759
760         return 0;
761 }
762
763 static int
764 uev_add_map (struct uevent * uev, struct vectors * vecs)
765 {
766         char *alias;
767         int major = -1, minor = -1, rc;
768
769         condlog(3, "%s: add map (uevent)", uev->kernel);
770         alias = uevent_get_dm_name(uev);
771         if (!alias) {
772                 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
773                 major = uevent_get_major(uev);
774                 minor = uevent_get_minor(uev);
775                 alias = dm_mapname(major, minor);
776                 if (!alias) {
777                         condlog(2, "%s: mapname not found for %d:%d",
778                                 uev->kernel, major, minor);
779                         return 1;
780                 }
781         }
782         pthread_cleanup_push(cleanup_lock, &vecs->lock);
783         lock(&vecs->lock);
784         pthread_testcancel();
785         rc = ev_add_map(uev->kernel, alias, vecs);
786         lock_cleanup_pop(vecs->lock);
787         free(alias);
788         return rc;
789 }
790
791 /*
792  * ev_add_map expects that the multipath device already exists in kernel
793  * before it is called. It just adds a device to multipathd or updates an
794  * existing device.
795  */
796 int
797 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
798 {
799         struct multipath * mpp;
800         int reassign_maps;
801         struct config *conf;
802
803         if (dm_is_mpath(alias) != 1) {
804                 condlog(4, "%s: not a multipath map", alias);
805                 return 0;
806         }
807
808         mpp = find_mp_by_alias(vecs->mpvec, alias);
809
810         if (mpp) {
811                 if (mpp->wait_for_udev > 1) {
812                         condlog(2, "%s: performing delayed actions",
813                                 mpp->alias);
814                         if (update_map(mpp, vecs, 0))
815                                 /* setup multipathd removed the map */
816                                 return 1;
817                 }
818                 conf = get_multipath_config();
819                 reassign_maps = conf->reassign_maps;
820                 put_multipath_config(conf);
821                 if (mpp->wait_for_udev) {
822                         mpp->wait_for_udev = 0;
823                         if (get_delayed_reconfig() &&
824                             !need_to_delay_reconfig(vecs)) {
825                                 condlog(2, "reconfigure (delayed)");
826                                 schedule_reconfigure(FORCE_RELOAD_WEAK);
827                                 return 0;
828                         }
829                 }
830                 /*
831                  * Not really an error -- we generate our own uevent
832                  * if we create a multipath mapped device as a result
833                  * of uev_add_path
834                  */
835                 if (reassign_maps) {
836                         condlog(3, "%s: Reassign existing device-mapper devices",
837                                 alias);
838                         dm_reassign(alias);
839                 }
840                 return 0;
841         }
842         condlog(2, "%s: adding map", alias);
843
844         /*
845          * now we can register the map
846          */
847         if ((mpp = add_map_without_path(vecs, alias))) {
848                 sync_map_state(mpp);
849                 condlog(2, "%s: devmap %s registered", alias, dev);
850                 return 0;
851         } else {
852                 condlog(2, "%s: ev_add_map failed", dev);
853                 return 1;
854         }
855 }
856
857 static int
858 uev_remove_map (struct uevent * uev, struct vectors * vecs)
859 {
860         char *alias;
861         int minor;
862         struct multipath *mpp;
863
864         condlog(3, "%s: remove map (uevent)", uev->kernel);
865         alias = uevent_get_dm_name(uev);
866         if (!alias) {
867                 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
868                 return 0;
869         }
870         minor = uevent_get_minor(uev);
871
872         pthread_cleanup_push(cleanup_lock, &vecs->lock);
873         lock(&vecs->lock);
874         pthread_testcancel();
875         mpp = find_mp_by_minor(vecs->mpvec, minor);
876
877         if (!mpp) {
878                 condlog(2, "%s: devmap not registered, can't remove",
879                         uev->kernel);
880                 goto out;
881         }
882         if (strcmp(mpp->alias, alias)) {
883                 condlog(2, "%s: map alias mismatch: have \"%s\", got \"%s\")",
884                         uev->kernel, mpp->alias, alias);
885                 goto out;
886         }
887
888         dm_queue_if_no_path(alias, 0);
889         remove_map_and_stop_waiter(mpp, vecs);
890 out:
891         lock_cleanup_pop(vecs->lock);
892         free(alias);
893         return 0;
894 }
895
896 /* Called from CLI handler */
897 int
898 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
899 {
900         struct multipath * mpp;
901
902         mpp = find_mp_by_minor(vecs->mpvec, minor);
903
904         if (!mpp) {
905                 condlog(2, "%s: devmap not registered, can't remove",
906                         devname);
907                 return 1;
908         }
909         if (strcmp(mpp->alias, alias)) {
910                 condlog(2, "%s: minor number mismatch (map %d, event %d)",
911                         mpp->alias, mpp->dmi->minor, minor);
912                 return 1;
913         }
914         return flush_map(mpp, vecs, 0);
915 }
916
917 static void
918 rescan_path(struct udev_device *ud)
919 {
920         ud = udev_device_get_parent_with_subsystem_devtype(ud, "scsi",
921                                                            "scsi_device");
922         if (ud)
923                 sysfs_attr_set_value(ud, "rescan", "1", strlen("1"));
924 }
925
926 void
927 handle_path_wwid_change(struct path *pp, struct vectors *vecs)
928 {
929         struct udev_device *udd;
930
931         if (!pp || !pp->udev)
932                 return;
933
934         udd = udev_device_ref(pp->udev);
935         if (!(ev_remove_path(pp, vecs, 1) & REMOVE_PATH_SUCCESS) && pp->mpp) {
936                 pp->dmstate = PSTATE_FAILED;
937                 dm_fail_path(pp->mpp->alias, pp->dev_t);
938         }
939         rescan_path(udd);
940         sysfs_attr_set_value(udd, "uevent", "add", strlen("add"));
941         udev_device_unref(udd);
942 }
943
944 bool
945 check_path_wwid_change(struct path *pp)
946 {
947         char wwid[WWID_SIZE];
948         int len = 0;
949         size_t i;
950
951         if (!strlen(pp->wwid))
952                 return false;
953
954         /* Get the real fresh device wwid by sgio. sysfs still has old
955          * data, so only get_vpd_sgio will work to get the new wwid */
956         len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
957
958         if (len <= 0) {
959                 condlog(2, "%s: failed to check wwid by sgio: len = %d",
960                         pp->dev, len);
961                 return false;
962         }
963
964         /*Strip any trailing blanks */
965         for (i = strlen(pp->wwid); i > 0 && pp->wwid[i-1] == ' '; i--);
966                 /* no-op */
967         pp->wwid[i] = '\0';
968         condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid);
969
970         if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
971                 condlog(0, "%s: wwid '%s' doesn't match wwid '%s' from device",
972                         pp->dev, pp->wwid, wwid);
973                 return true;
974         }
975
976         return false;
977 }
978
979 /*
980  * uev_add_path can call uev_update_path, and uev_update_path can call
981  * uev_add_path
982  */
983 static int uev_update_path (struct uevent *uev, struct vectors * vecs);
984
985 static int
986 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
987 {
988         struct path *pp;
989         int ret = 0, i;
990         struct config *conf;
991         bool partial_init = false;
992
993         condlog(3, "%s: add path (uevent)", uev->kernel);
994         if (strstr(uev->kernel, "..") != NULL) {
995                 /*
996                  * Don't allow relative device names in the pathvec
997                  */
998                 condlog(0, "%s: path name is invalid", uev->kernel);
999                 return 1;
1000         }
1001
1002         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1003         lock(&vecs->lock);
1004         pthread_testcancel();
1005         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1006         if (pp) {
1007                 int r;
1008                 struct multipath *prev_mpp = NULL;
1009
1010                 if (pp->initialized == INIT_PARTIAL) {
1011                         partial_init = true;
1012                         goto out;
1013                 } else if (pp->initialized == INIT_REMOVED) {
1014                         condlog(3, "%s: re-adding removed path", pp->dev);
1015                         pp->initialized = INIT_NEW;
1016                         prev_mpp = pp->mpp;
1017                         if (prev_mpp == NULL)
1018                                 condlog(0, "Bug: %s was in INIT_REMOVED state without being a multipath member",
1019                                         pp->dev);
1020                         pp->mpp = NULL;
1021                         /* make sure get_uid() is called */
1022                         pp->wwid[0] = '\0';
1023                 } else
1024                         condlog(3,
1025                                 "%s: spurious uevent, path already in pathvec",
1026                                 uev->kernel);
1027
1028                 if (!pp->mpp && !strlen(pp->wwid)) {
1029                         condlog(3, "%s: reinitialize path", uev->kernel);
1030                         udev_device_unref(pp->udev);
1031                         pp->udev = udev_device_ref(uev->udev);
1032                         conf = get_multipath_config();
1033                         pthread_cleanup_push(put_multipath_config, conf);
1034                         r = pathinfo(pp, conf,
1035                                      DI_ALL | DI_BLACKLIST);
1036                         pthread_cleanup_pop(1);
1037                         if (r == PATHINFO_OK && !prev_mpp)
1038                                 ret = ev_add_path(pp, vecs, need_do_map);
1039                         else if (r == PATHINFO_OK &&
1040                                  !strncmp(pp->wwid, prev_mpp->wwid, WWID_SIZE)) {
1041                                 /*
1042                                  * Path was unsuccessfully removed, but now
1043                                  * re-added, and still belongs to the right map
1044                                  * - all fine, reinstate asap
1045                                  */
1046                                 pp->mpp = prev_mpp;
1047                                 pp->tick = 1;
1048                                 ret = 0;
1049                         } else if (prev_mpp) {
1050                                 /*
1051                                  * Bad: re-added path still hangs in wrong map
1052                                  * Make another attempt to remove the path
1053                                  */
1054                                 pp->mpp = prev_mpp;
1055                                 if (!(ev_remove_path(pp, vecs, true) &
1056                                       REMOVE_PATH_SUCCESS)) {
1057                                         /*
1058                                          * Failure in ev_remove_path will keep
1059                                          * path in pathvec in INIT_REMOVED state
1060                                          * Fail the path to make sure it isn't
1061                                          * used any more.
1062                                          */
1063                                         pp->dmstate = PSTATE_FAILED;
1064                                         dm_fail_path(pp->mpp->alias, pp->dev_t);
1065                                         condlog(1, "%s: failed to re-add path still mapped in %s",
1066                                                 pp->dev, pp->mpp->alias);
1067                                         ret = 1;
1068                                 } else if (r == PATHINFO_OK)
1069                                         /*
1070                                          * Path successfully freed, move on to
1071                                          * "new path" code path below
1072                                          */
1073                                         pp = NULL;
1074                         } else if (r == PATHINFO_SKIPPED) {
1075                                 condlog(3, "%s: remove blacklisted path",
1076                                         uev->kernel);
1077                                 i = find_slot(vecs->pathvec, (void *)pp);
1078                                 if (i != -1)
1079                                         vector_del_slot(vecs->pathvec, i);
1080                                 free_path(pp);
1081                         } else {
1082                                 condlog(0, "%s: failed to reinitialize path",
1083                                         uev->kernel);
1084                                 ret = 1;
1085                         }
1086                 }
1087         }
1088         if (pp)
1089                 goto out;
1090
1091         /*
1092          * get path vital state
1093          */
1094         conf = get_multipath_config();
1095         pthread_cleanup_push(put_multipath_config, conf);
1096         ret = alloc_path_with_pathinfo(conf, uev->udev,
1097                                        uev->wwid, DI_ALL, &pp);
1098         pthread_cleanup_pop(1);
1099         if (!pp) {
1100                 if (ret == PATHINFO_SKIPPED)
1101                         ret = 0;
1102                 else {
1103                         condlog(3, "%s: failed to get path info", uev->kernel);
1104                         ret = 1;
1105                 }
1106                 goto out;
1107         }
1108         ret = store_path(vecs->pathvec, pp);
1109         if (!ret) {
1110                 conf = get_multipath_config();
1111                 pp->checkint = conf->checkint;
1112                 put_multipath_config(conf);
1113                 ret = ev_add_path(pp, vecs, need_do_map);
1114         } else {
1115                 condlog(0, "%s: failed to store path info, "
1116                         "dropping event",
1117                         uev->kernel);
1118                 free_path(pp);
1119                 ret = 1;
1120         }
1121 out:
1122         lock_cleanup_pop(vecs->lock);
1123         if (partial_init)
1124                 return uev_update_path(uev, vecs);
1125         return ret;
1126 }
1127
1128 /*
1129  * returns:
1130  * 0: added
1131  * 1: error
1132  */
1133 int
1134 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
1135 {
1136         struct multipath * mpp;
1137         char *params __attribute((cleanup(cleanup_charp))) = NULL;
1138         int retries = 3;
1139         int start_waiter = 0;
1140         int ret;
1141
1142         /*
1143          * need path UID to go any further
1144          */
1145         if (strlen(pp->wwid) == 0) {
1146                 condlog(0, "%s: failed to get path uid", pp->dev);
1147                 goto fail; /* leave path added to pathvec */
1148         }
1149         mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
1150         if (mpp && pp->size && mpp->size != pp->size) {
1151                 condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
1152                 int i = find_slot(vecs->pathvec, (void *)pp);
1153                 if (i != -1)
1154                         vector_del_slot(vecs->pathvec, i);
1155                 free_path(pp);
1156                 return 1;
1157         }
1158         if (mpp && mpp->wait_for_udev &&
1159             (pathcount(mpp, PATH_UP) > 0 ||
1160              (pathcount(mpp, PATH_GHOST) > 0 &&
1161               path_get_tpgs(pp) != TPGS_IMPLICIT &&
1162               mpp->ghost_delay_tick <= 0))) {
1163                 /* if wait_for_udev is set and valid paths exist */
1164                 condlog(3, "%s: delaying path addition until %s is fully initialized",
1165                         pp->dev, mpp->alias);
1166                 mpp->wait_for_udev = 2;
1167                 orphan_path(pp, "waiting for create to complete");
1168                 return 0;
1169         }
1170
1171         pp->mpp = mpp;
1172 rescan:
1173         if (mpp) {
1174                 condlog(4,"%s: adopting all paths for path %s",
1175                         mpp->alias, pp->dev);
1176                 if (adopt_paths(vecs->pathvec, mpp) || pp->mpp != mpp ||
1177                     find_slot(mpp->paths, pp) == -1)
1178                         goto fail; /* leave path added to pathvec */
1179
1180                 verify_paths(mpp);
1181                 mpp->action = ACT_RELOAD;
1182         } else {
1183                 if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
1184                         orphan_path(pp, "only one path");
1185                         return 0;
1186                 }
1187                 condlog(4,"%s: creating new map", pp->dev);
1188                 if ((mpp = add_map_with_path(vecs, pp, 1))) {
1189                         mpp->action = ACT_CREATE;
1190                         /*
1191                          * We don't depend on ACT_CREATE, as domap will
1192                          * set it to ACT_NOTHING when complete.
1193                          */
1194                         start_waiter = 1;
1195                 }
1196                 else
1197                         goto fail; /* leave path added to pathvec */
1198         }
1199
1200         /* persistent reservation check*/
1201         mpath_pr_event_handle(pp);
1202
1203         if (!need_do_map)
1204                 return 0;
1205
1206         if (!dm_map_present(mpp->alias)) {
1207                 mpp->action = ACT_CREATE;
1208                 start_waiter = 1;
1209         }
1210         /*
1211          * push the map to the device-mapper
1212          */
1213         if (setup_map(mpp, &params, vecs)) {
1214                 condlog(0, "%s: failed to setup map for addition of new "
1215                         "path %s", mpp->alias, pp->dev);
1216                 goto fail_map;
1217         }
1218         /*
1219          * reload the map for the multipath mapped device
1220          */
1221         ret = domap(mpp, params, 1);
1222         while (ret == DOMAP_RETRY && retries-- > 0) {
1223                 condlog(0, "%s: retry domap for addition of new "
1224                         "path %s", mpp->alias, pp->dev);
1225                 sleep(1);
1226                 ret = domap(mpp, params, 1);
1227         }
1228         if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
1229                 condlog(0, "%s: failed in domap for addition of new "
1230                         "path %s", mpp->alias, pp->dev);
1231                 /*
1232                  * deal with asynchronous uevents :((
1233                  */
1234                 if (mpp->action == ACT_RELOAD && retries-- > 0) {
1235                         condlog(0, "%s: ev_add_path sleep", mpp->alias);
1236                         sleep(1);
1237                         update_mpp_paths(mpp, vecs->pathvec);
1238                         free(params);
1239                         params = NULL;
1240                         goto rescan;
1241                 }
1242                 else if (mpp->action == ACT_RELOAD)
1243                         condlog(0, "%s: giving up reload", mpp->alias);
1244                 else
1245                         goto fail_map;
1246         }
1247
1248         if ((mpp->action == ACT_CREATE ||
1249              (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
1250             wait_for_events(mpp, vecs))
1251                         goto fail_map;
1252
1253         /*
1254          * update our state from kernel regardless of create or reload
1255          */
1256         if (setup_multipath(vecs, mpp))
1257                 goto fail; /* if setup_multipath fails, it removes the map */
1258
1259         sync_map_state(mpp);
1260
1261         if (retries >= 0) {
1262                 condlog(2, "%s [%s]: path added to devmap %s",
1263                         pp->dev, pp->dev_t, mpp->alias);
1264                 return 0;
1265         } else
1266                 goto fail;
1267
1268 fail_map:
1269         remove_map(mpp, vecs->pathvec, vecs->mpvec);
1270 fail:
1271         orphan_path(pp, "failed to add path");
1272         return 1;
1273 }
1274
1275 static int
1276 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
1277 {
1278         struct path *pp;
1279
1280         condlog(3, "%s: remove path (uevent)", uev->kernel);
1281         delete_foreign(uev->udev);
1282
1283         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1284         lock(&vecs->lock);
1285         pthread_testcancel();
1286         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1287         if (pp)
1288                 ev_remove_path(pp, vecs, need_do_map);
1289         lock_cleanup_pop(vecs->lock);
1290         if (!pp) /* Not an error; path might have been purged earlier */
1291                 condlog(0, "%s: path already removed", uev->kernel);
1292         return 0;
1293 }
1294
1295 int
1296 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
1297 {
1298         struct multipath * mpp;
1299         int i, retval = REMOVE_PATH_SUCCESS;
1300         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
1301
1302         /*
1303          * avoid referring to the map of an orphaned path
1304          */
1305         if ((mpp = pp->mpp)) {
1306                 /*
1307                  * Mark the path as removed. In case of success, we
1308                  * will delete it for good. Otherwise, it will be deleted
1309                  * later, unless all attempts to reload this map fail.
1310                  */
1311                 set_path_removed(pp);
1312
1313                 /*
1314                  * transform the mp->pg vector of vectors of paths
1315                  * into a mp->params string to feed the device-mapper
1316                  */
1317                 if (update_mpp_paths(mpp, vecs->pathvec)) {
1318                         condlog(0, "%s: failed to update paths",
1319                                 mpp->alias);
1320                         goto fail;
1321                 }
1322
1323                 /*
1324                  * we have to explicitly remove pp from mpp->paths,
1325                  * update_mpp_paths() doesn't do that.
1326                  */
1327                 i = find_slot(mpp->paths, pp);
1328                 if (i != -1)
1329                         vector_del_slot(mpp->paths, i);
1330
1331                 /*
1332                  * remove the map IF removing the last path
1333                  */
1334                 if (VECTOR_SIZE(mpp->paths) == 0) {
1335                         char alias[WWID_SIZE];
1336
1337                         /*
1338                          * flush_map will fail if the device is open
1339                          */
1340                         strlcpy(alias, mpp->alias, WWID_SIZE);
1341                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
1342                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
1343                                 mpp->retry_tick = 0;
1344                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
1345                                 mpp->disable_queueing = 1;
1346                                 mpp->stat_map_failures++;
1347                                 dm_queue_if_no_path(mpp->alias, 0);
1348                         }
1349                         if (!flush_map(mpp, vecs, 1)) {
1350                                 condlog(2, "%s: removed map after"
1351                                         " removing all paths",
1352                                         alias);
1353                                 /* flush_map() has freed the path */
1354                                 goto out;
1355                         }
1356                         /*
1357                          * Not an error, continue
1358                          */
1359                 }
1360
1361                 if (setup_map(mpp, &params, vecs)) {
1362                         condlog(0, "%s: failed to setup map for"
1363                                 " removal of path %s", mpp->alias, pp->dev);
1364                         goto fail;
1365                 }
1366
1367                 if (mpp->wait_for_udev) {
1368                         mpp->wait_for_udev = 2;
1369                         retval = REMOVE_PATH_DELAY;
1370                         goto out;
1371                 }
1372
1373                 if (!need_do_map) {
1374                         retval = REMOVE_PATH_DELAY;
1375                         goto out;
1376                 }
1377                 /*
1378                  * reload the map
1379                  */
1380                 mpp->action = ACT_RELOAD;
1381                 if (domap(mpp, params, 1) == DOMAP_FAIL) {
1382                         condlog(0, "%s: failed in domap for "
1383                                 "removal of path %s",
1384                                 mpp->alias, pp->dev);
1385                         retval = REMOVE_PATH_FAILURE;
1386                 } else {
1387                         /*
1388                          * update our state from kernel
1389                          */
1390                         char devt[BLK_DEV_SIZE];
1391
1392                         strlcpy(devt, pp->dev_t, sizeof(devt));
1393
1394                         /* setup_multipath will free the path
1395                          * regardless of whether it succeeds or
1396                          * fails */
1397                         if (setup_multipath(vecs, mpp))
1398                                 return REMOVE_PATH_MAP_ERROR;
1399                         sync_map_state(mpp);
1400
1401                         condlog(2, "%s: path removed from map %s",
1402                                 devt, mpp->alias);
1403                 }
1404         } else {
1405                 /* mpp == NULL */
1406                 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
1407                         vector_del_slot(vecs->pathvec, i);
1408                 free_path(pp);
1409         }
1410 out:
1411         return retval;
1412
1413 fail:
1414         condlog(0, "%s: error removing path. removing map %s", pp->dev,
1415                 mpp->alias);
1416         remove_map_and_stop_waiter(mpp, vecs);
1417         return REMOVE_PATH_MAP_ERROR;
1418 }
1419
1420 int
1421 finish_path_init(struct path *pp, struct vectors * vecs)
1422 {
1423         int r;
1424         struct config *conf;
1425
1426         if (pp->udev && pp->uid_attribute && *pp->uid_attribute &&
1427             !udev_device_get_is_initialized(pp->udev))
1428                 return 0;
1429         conf = get_multipath_config();
1430         pthread_cleanup_push(put_multipath_config, conf);
1431         r = pathinfo(pp, conf, DI_ALL|DI_BLACKLIST);
1432         pthread_cleanup_pop(1);
1433
1434         if (r == PATHINFO_OK)
1435                 return 0;
1436
1437         condlog(0, "%s: error fully initializing path, removing", pp->dev);
1438         ev_remove_path(pp, vecs, 1);
1439         return -1;
1440 }
1441
1442 static int
1443 uev_update_path (struct uevent *uev, struct vectors * vecs)
1444 {
1445         int ro, retval = 0, rc;
1446         struct path * pp;
1447         struct config *conf;
1448         int needs_reinit = 0;
1449
1450         switch ((rc = change_foreign(uev->udev))) {
1451         case FOREIGN_OK:
1452                 /* known foreign path, ignore event */
1453                 return 0;
1454         case FOREIGN_IGNORED:
1455                 break;
1456         case FOREIGN_ERR:
1457                 condlog(3, "%s: error in change_foreign", __func__);
1458                 break;
1459         default:
1460                 condlog(1, "%s: return code %d of change_forein is unsupported",
1461                         __func__, rc);
1462                 break;
1463         }
1464
1465         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1466         lock(&vecs->lock);
1467         pthread_testcancel();
1468
1469         pp = find_path_by_dev(vecs->pathvec, uev->kernel);
1470         if (pp) {
1471                 struct multipath *mpp = pp->mpp;
1472                 char wwid[WWID_SIZE];
1473
1474                 if (pp->initialized == INIT_REQUESTED_UDEV) {
1475                         needs_reinit = 1;
1476                         goto out;
1477                 }
1478                 /* Don't deal with other types of failed initialization
1479                  * now. check_path will handle it */
1480                 if (!strlen(pp->wwid) && pp->initialized != INIT_PARTIAL)
1481                         goto out;
1482
1483                 strcpy(wwid, pp->wwid);
1484                 rc = get_uid(pp, pp->state, uev->udev, 0);
1485
1486                 if (rc != 0)
1487                         strcpy(pp->wwid, wwid);
1488                 else if (strlen(wwid) &&
1489                          strncmp(wwid, pp->wwid, WWID_SIZE) != 0) {
1490                         condlog(0, "%s: path wwid changed from '%s' to '%s'",
1491                                 uev->kernel, wwid, pp->wwid);
1492                         ev_remove_path(pp, vecs, 1);
1493                         needs_reinit = 1;
1494                         goto out;
1495                 } else if (pp->initialized == INIT_PARTIAL) {
1496                         udev_device_unref(pp->udev);
1497                         pp->udev = udev_device_ref(uev->udev);
1498                         if (finish_path_init(pp, vecs) < 0) {
1499                                 retval = 1;
1500                                 goto out;
1501                         }
1502                 } else {
1503                         udev_device_unref(pp->udev);
1504                         pp->udev = udev_device_ref(uev->udev);
1505                         conf = get_multipath_config();
1506                         pthread_cleanup_push(put_multipath_config, conf);
1507                         if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK)
1508                                 condlog(1, "%s: pathinfo failed after change uevent",
1509                                         uev->kernel);
1510                         pthread_cleanup_pop(1);
1511                 }
1512
1513                 ro = uevent_get_disk_ro(uev);
1514                 if (mpp && ro >= 0) {
1515                         condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
1516
1517                         if (mpp->wait_for_udev)
1518                                 mpp->wait_for_udev = 2;
1519                         else {
1520                                 if (ro == 1)
1521                                         pp->mpp->force_readonly = 1;
1522                                 retval = reload_and_sync_map(mpp, vecs, 0);
1523                                 if (retval == 2)
1524                                         condlog(2, "%s: map removed during reload", pp->dev);
1525                                 else {
1526                                         pp->mpp->force_readonly = 0;
1527                                         condlog(2, "%s: map %s reloaded (retval %d)", uev->kernel, mpp->alias, retval);
1528                                 }
1529                         }
1530                 }
1531         }
1532 out:
1533         lock_cleanup_pop(vecs->lock);
1534         if (!pp) {
1535                 /* If the path is blacklisted, print a debug/non-default verbosity message. */
1536                 if (uev->udev) {
1537                         int flag = DI_SYSFS | DI_WWID;
1538
1539                         conf = get_multipath_config();
1540                         pthread_cleanup_push(put_multipath_config, conf);
1541                         retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
1542                         pthread_cleanup_pop(1);
1543
1544                         if (retval == PATHINFO_SKIPPED) {
1545                                 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
1546                                 return 0;
1547                         }
1548                 }
1549
1550                 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1551         }
1552         /* pp->initalized must not be INIT_PARTIAL if needs_reinit is set */
1553         if (needs_reinit)
1554                 retval = uev_add_path(uev, vecs, 1);
1555         return retval;
1556 }
1557
1558 static int
1559 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1560 {
1561         char *action = NULL, *devt = NULL;
1562         struct path *pp;
1563         int r = 1;
1564
1565         action = uevent_get_dm_action(uev);
1566         if (!action)
1567                 return 1;
1568         if (strncmp(action, "PATH_FAILED", 11))
1569                 goto out;
1570         devt = uevent_get_dm_path(uev);
1571         if (!devt) {
1572                 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1573                 goto out;
1574         }
1575
1576         pthread_cleanup_push(cleanup_lock, &vecs->lock);
1577         lock(&vecs->lock);
1578         pthread_testcancel();
1579         pp = find_path_by_devt(vecs->pathvec, devt);
1580         if (!pp)
1581                 goto out_lock;
1582         r = io_err_stat_handle_pathfail(pp);
1583         if (r)
1584                 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1585                                 pp->dev);
1586 out_lock:
1587         lock_cleanup_pop(vecs->lock);
1588         free(devt);
1589         free(action);
1590         return r;
1591 out:
1592         free(action);
1593         return 1;
1594 }
1595
1596 static int
1597 map_discovery (struct vectors * vecs)
1598 {
1599         struct multipath * mpp;
1600         unsigned int i;
1601
1602         if (dm_get_maps(vecs->mpvec))
1603                 return 1;
1604
1605         vector_foreach_slot (vecs->mpvec, mpp, i)
1606                 if (update_multipath_table(mpp, vecs->pathvec, 0) != DMP_OK) {
1607                         remove_map(mpp, vecs->pathvec, vecs->mpvec);
1608                         i--;
1609                 }
1610
1611         return 0;
1612 }
1613
1614 int
1615 uev_trigger (struct uevent * uev, void * trigger_data)
1616 {
1617         int r = 0;
1618         struct vectors * vecs;
1619         struct uevent *merge_uev, *tmp;
1620         enum daemon_status state;
1621
1622         vecs = (struct vectors *)trigger_data;
1623
1624         pthread_cleanup_push(config_cleanup, NULL);
1625         pthread_mutex_lock(&config_lock);
1626         while (running_state != DAEMON_IDLE &&
1627                running_state != DAEMON_RUNNING &&
1628                running_state != DAEMON_SHUTDOWN)
1629                 pthread_cond_wait(&config_cond, &config_lock);
1630         state = running_state;
1631         pthread_cleanup_pop(1);
1632
1633         if (state == DAEMON_SHUTDOWN)
1634                 return 0;
1635
1636         /*
1637          * device map event
1638          * Add events are ignored here as the tables
1639          * are not fully initialised then.
1640          */
1641         if (!strncmp(uev->kernel, "dm-", 3)) {
1642                 if (!uevent_is_mpath(uev)) {
1643                         if (!strncmp(uev->action, "change", 6))
1644                                 (void)add_foreign(uev->udev);
1645                         else if (!strncmp(uev->action, "remove", 6))
1646                                 (void)delete_foreign(uev->udev);
1647                         goto out;
1648                 }
1649                 if (!strncmp(uev->action, "change", 6)) {
1650                         r = uev_add_map(uev, vecs);
1651
1652                         /*
1653                          * the kernel-side dm-mpath issues a PATH_FAILED event
1654                          * when it encounters a path IO error. It is reason-
1655                          * able be the entry of path IO error accounting pro-
1656                          * cess.
1657                          */
1658                         uev_pathfail_check(uev, vecs);
1659                 } else if (!strncmp(uev->action, "remove", 6)) {
1660                         r = uev_remove_map(uev, vecs);
1661                 }
1662                 goto out;
1663         }
1664
1665         /*
1666          * path add/remove/change event, add/remove maybe merged
1667          */
1668         list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1669                 if (!strncmp(merge_uev->action, "add", 3))
1670                         r += uev_add_path(merge_uev, vecs, 0);
1671                 if (!strncmp(merge_uev->action, "remove", 6))
1672                         r += uev_remove_path(merge_uev, vecs, 0);
1673         }
1674
1675         if (!strncmp(uev->action, "add", 3))
1676                 r += uev_add_path(uev, vecs, 1);
1677         if (!strncmp(uev->action, "remove", 6))
1678                 r += uev_remove_path(uev, vecs, 1);
1679         if (!strncmp(uev->action, "change", 6))
1680                 r += uev_update_path(uev, vecs);
1681
1682 out:
1683         return r;
1684 }
1685
1686 static void rcu_unregister(__attribute__((unused)) void *param)
1687 {
1688         rcu_unregister_thread();
1689 }
1690
1691 static void *
1692 ueventloop (void * ap)
1693 {
1694         struct udev *udev = ap;
1695
1696         pthread_cleanup_push(rcu_unregister, NULL);
1697         rcu_register_thread();
1698         if (uevent_listen(udev))
1699                 condlog(0, "error starting uevent listener");
1700         pthread_cleanup_pop(1);
1701         return NULL;
1702 }
1703
1704 static void *
1705 uevqloop (void * ap)
1706 {
1707         pthread_cleanup_push(rcu_unregister, NULL);
1708         rcu_register_thread();
1709         if (uevent_dispatch(&uev_trigger, ap))
1710                 condlog(0, "error starting uevent dispatcher");
1711         pthread_cleanup_pop(1);
1712         return NULL;
1713 }
1714 static void *
1715 uxlsnrloop (void * ap)
1716 {
1717         long ux_sock;
1718
1719         pthread_cleanup_push(rcu_unregister, NULL);
1720         rcu_register_thread();
1721
1722         ux_sock = ux_socket_listen(DEFAULT_SOCKET);
1723         if (ux_sock == -1) {
1724                 condlog(1, "could not create uxsock: %d", errno);
1725                 exit_daemon();
1726                 goto out;
1727         }
1728         pthread_cleanup_push(uxsock_cleanup, (void *)ux_sock);
1729
1730         if (cli_init()) {
1731                 condlog(1, "Failed to init uxsock listener");
1732                 exit_daemon();
1733                 goto out_sock;
1734         }
1735
1736         /* Tell main thread that thread has started */
1737         post_config_state(DAEMON_CONFIGURE);
1738
1739         init_handler_callbacks();
1740         umask(077);
1741
1742         /*
1743          * Wait for initial reconfiguration to finish, while
1744          * hadling signals
1745          */
1746         while (wait_for_state_change_if(DAEMON_CONFIGURE, 50)
1747                == DAEMON_CONFIGURE)
1748                 handle_signals(false);
1749
1750         uxsock_listen(ux_sock, ap);
1751
1752 out_sock:
1753         pthread_cleanup_pop(1); /* uxsock_cleanup */
1754 out:
1755         pthread_cleanup_pop(1); /* rcu_unregister */
1756         return NULL;
1757 }
1758
1759 void
1760 exit_daemon (void)
1761 {
1762         post_config_state(DAEMON_SHUTDOWN);
1763 }
1764
1765 static void
1766 fail_path (struct path * pp, int del_active)
1767 {
1768         if (!pp->mpp)
1769                 return;
1770
1771         condlog(2, "checker failed path %s in map %s",
1772                  pp->dev_t, pp->mpp->alias);
1773
1774         dm_fail_path(pp->mpp->alias, pp->dev_t);
1775         if (del_active)
1776                 update_queue_mode_del_path(pp->mpp);
1777 }
1778
1779 /*
1780  * caller must have locked the path list before calling that function
1781  */
1782 static void
1783 reinstate_path (struct path * pp)
1784 {
1785         if (!pp->mpp)
1786                 return;
1787
1788         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
1789                 condlog(0, "%s: reinstate failed", pp->dev_t);
1790         else {
1791                 condlog(2, "%s: reinstated", pp->dev_t);
1792                 update_queue_mode_add_path(pp->mpp);
1793         }
1794 }
1795
1796 static void
1797 enable_group(struct path * pp)
1798 {
1799         struct pathgroup * pgp;
1800
1801         /*
1802          * if path is added through uev_add_path, pgindex can be unset.
1803          * next update_strings() will set it, upon map reload event.
1804          *
1805          * we can safely return here, because upon map reload, all
1806          * PG will be enabled.
1807          */
1808         if (!pp->mpp->pg || !pp->pgindex)
1809                 return;
1810
1811         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1812
1813         if (pgp->status == PGSTATE_DISABLED) {
1814                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1815                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1816         }
1817 }
1818
1819 static void
1820 mpvec_garbage_collector (struct vectors * vecs)
1821 {
1822         struct multipath * mpp;
1823         unsigned int i;
1824
1825         if (!vecs->mpvec)
1826                 return;
1827
1828         vector_foreach_slot (vecs->mpvec, mpp, i) {
1829                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1830                         condlog(2, "%s: remove dead map", mpp->alias);
1831                         remove_map_and_stop_waiter(mpp, vecs);
1832                         i--;
1833                 }
1834         }
1835 }
1836
1837 /* This is called after a path has started working again. It the multipath
1838  * device for this path uses the followover failback type, and this is the
1839  * best pathgroup, and this is the first path in the pathgroup to come back
1840  * up, then switch to this pathgroup */
1841 static int
1842 followover_should_failback(struct path * pp)
1843 {
1844         struct pathgroup * pgp;
1845         struct path *pp1;
1846         int i;
1847
1848         if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1849             !pp->mpp->pg || !pp->pgindex ||
1850             pp->pgindex != pp->mpp->bestpg)
1851                 return 0;
1852
1853         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1854         vector_foreach_slot(pgp->paths, pp1, i) {
1855                 if (pp1 == pp)
1856                         continue;
1857                 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1858                         return 0;
1859         }
1860         return 1;
1861 }
1862
1863 static void
1864 missing_uev_wait_tick(struct vectors *vecs)
1865 {
1866         struct multipath * mpp;
1867         unsigned int i;
1868         int timed_out = 0;
1869
1870         vector_foreach_slot (vecs->mpvec, mpp, i) {
1871                 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1872                         timed_out = 1;
1873                         condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1874                         if (mpp->wait_for_udev > 1 &&
1875                             update_map(mpp, vecs, 0)) {
1876                                 /* update_map removed map */
1877                                 i--;
1878                                 continue;
1879                         }
1880                         mpp->wait_for_udev = 0;
1881                 }
1882         }
1883
1884         if (timed_out && get_delayed_reconfig() &&
1885             !need_to_delay_reconfig(vecs)) {
1886                 condlog(2, "reconfigure (delayed)");
1887                 schedule_reconfigure(FORCE_RELOAD_WEAK);
1888         }
1889 }
1890
1891 static void
1892 ghost_delay_tick(struct vectors *vecs)
1893 {
1894         struct multipath * mpp;
1895         unsigned int i;
1896
1897         vector_foreach_slot (vecs->mpvec, mpp, i) {
1898                 if (mpp->ghost_delay_tick <= 0)
1899                         continue;
1900                 if (--mpp->ghost_delay_tick <= 0) {
1901                         condlog(0, "%s: timed out waiting for active path",
1902                                 mpp->alias);
1903                         mpp->force_udev_reload = 1;
1904                         if (update_map(mpp, vecs, 0) != 0) {
1905                                 /* update_map removed map */
1906                                 i--;
1907                                 continue;
1908                         }
1909                 }
1910         }
1911 }
1912
1913 static void
1914 defered_failback_tick (vector mpvec)
1915 {
1916         struct multipath * mpp;
1917         unsigned int i;
1918
1919         vector_foreach_slot (mpvec, mpp, i) {
1920                 /*
1921                  * deferred failback getting sooner
1922                  */
1923                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1924                         mpp->failback_tick--;
1925
1926                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1927                                 switch_pathgroup(mpp);
1928                 }
1929         }
1930 }
1931
1932 static void
1933 retry_count_tick(vector mpvec)
1934 {
1935         struct multipath *mpp;
1936         unsigned int i;
1937
1938         vector_foreach_slot (mpvec, mpp, i) {
1939                 if (mpp->retry_tick > 0) {
1940                         mpp->stat_total_queueing_time++;
1941                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
1942                         if(--mpp->retry_tick == 0) {
1943                                 mpp->stat_map_failures++;
1944                                 dm_queue_if_no_path(mpp->alias, 0);
1945                                 condlog(2, "%s: Disable queueing", mpp->alias);
1946                         }
1947                 }
1948         }
1949 }
1950
1951 static void
1952 partial_retrigger_tick(vector pathvec)
1953 {
1954         struct path *pp;
1955         unsigned int i;
1956
1957         vector_foreach_slot (pathvec, pp, i) {
1958                 if (pp->initialized == INIT_PARTIAL && pp->udev &&
1959                     pp->partial_retrigger_delay > 0 &&
1960                     --pp->partial_retrigger_delay == 0) {
1961                         const char *msg = udev_device_get_is_initialized(pp->udev) ?
1962                                           "change" : "add";
1963
1964                         sysfs_attr_set_value(pp->udev, "uevent", msg,
1965                                              strlen(msg));
1966                 }
1967         }
1968 }
1969
1970 int update_prio(struct path *pp, int refresh_all)
1971 {
1972         int oldpriority;
1973         struct path *pp1;
1974         struct pathgroup * pgp;
1975         int i, j, changed = 0;
1976         struct config *conf;
1977
1978         if (refresh_all) {
1979                 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1980                         vector_foreach_slot (pgp->paths, pp1, j) {
1981                                 oldpriority = pp1->priority;
1982                                 conf = get_multipath_config();
1983                                 pthread_cleanup_push(put_multipath_config,
1984                                                      conf);
1985                                 pathinfo(pp1, conf, DI_PRIO);
1986                                 pthread_cleanup_pop(1);
1987                                 if (pp1->priority != oldpriority)
1988                                         changed = 1;
1989                         }
1990                 }
1991                 return changed;
1992         }
1993         oldpriority = pp->priority;
1994         conf = get_multipath_config();
1995         pthread_cleanup_push(put_multipath_config, conf);
1996         if (pp->state != PATH_DOWN)
1997                 pathinfo(pp, conf, DI_PRIO);
1998         pthread_cleanup_pop(1);
1999
2000         if (pp->priority == oldpriority)
2001                 return 0;
2002         return 1;
2003 }
2004
2005 static int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh,
2006                       int is_daemon)
2007 {
2008         char *params __attribute__((cleanup(cleanup_charp))) = NULL;
2009         struct path *pp;
2010         int i, r;
2011
2012         update_mpp_paths(mpp, vecs->pathvec);
2013         if (refresh) {
2014                 vector_foreach_slot (mpp->paths, pp, i) {
2015                         struct config *conf = get_multipath_config();
2016                         pthread_cleanup_push(put_multipath_config, conf);
2017                         r = pathinfo(pp, conf, DI_PRIO);
2018                         pthread_cleanup_pop(1);
2019                         if (r) {
2020                                 condlog(2, "%s: failed to refresh pathinfo",
2021                                         mpp->alias);
2022                                 return 1;
2023                         }
2024                 }
2025         }
2026         if (setup_map(mpp, &params, vecs)) {
2027                 condlog(0, "%s: failed to setup map", mpp->alias);
2028                 return 1;
2029         }
2030         select_action(mpp, vecs->mpvec, 1);
2031
2032         r = domap(mpp, params, is_daemon);
2033         if (r == DOMAP_FAIL || r == DOMAP_RETRY) {
2034                 condlog(3, "%s: domap (%u) failure "
2035                         "for reload map", mpp->alias, r);
2036                 return 1;
2037         }
2038
2039         return 0;
2040 }
2041
2042 int reload_and_sync_map(struct multipath *mpp,
2043                         struct vectors *vecs, int refresh)
2044 {
2045         if (reload_map(vecs, mpp, refresh, 1))
2046                 return 1;
2047         if (setup_multipath(vecs, mpp) != 0)
2048                 return 2;
2049         sync_map_state(mpp);
2050
2051         return 0;
2052 }
2053
2054 static int check_path_reinstate_state(struct path * pp) {
2055         struct timespec curr_time;
2056
2057         /*
2058          * This function is only called when the path state changes
2059          * from "bad" to "good". pp->state reflects the *previous* state.
2060          * If this was "bad", we know that a failure must have occured
2061          * beforehand, and count that.
2062          * Note that we count path state _changes_ this way. If a path
2063          * remains in "bad" state, failure count is not increased.
2064          */
2065
2066         if (!((pp->mpp->san_path_err_threshold > 0) &&
2067                                 (pp->mpp->san_path_err_forget_rate > 0) &&
2068                                 (pp->mpp->san_path_err_recovery_time >0))) {
2069                 return 0;
2070         }
2071
2072         if (pp->disable_reinstate) {
2073                 /* If there are no other usable paths, reinstate the path */
2074                 if (count_active_paths(pp->mpp) == 0) {
2075                         condlog(2, "%s : reinstating path early", pp->dev);
2076                         goto reinstate_path;
2077                 }
2078                 get_monotonic_time(&curr_time);
2079
2080                 /* If path became failed again or continue failed, should reset
2081                  * path san_path_err_forget_rate and path dis_reinstate_time to
2082                  * start a new stable check.
2083                  */
2084                 if ((pp->state != PATH_UP) && (pp->state != PATH_GHOST) &&
2085                         (pp->state != PATH_DELAYED)) {
2086                         pp->san_path_err_forget_rate =
2087                                 pp->mpp->san_path_err_forget_rate;
2088                         pp->dis_reinstate_time = curr_time.tv_sec;
2089                 }
2090
2091                 if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
2092                         condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
2093                         goto reinstate_path;
2094                 }
2095                 return 1;
2096         }
2097         /* forget errors on a working path */
2098         if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
2099                         pp->path_failures > 0) {
2100                 if (pp->san_path_err_forget_rate > 0){
2101                         pp->san_path_err_forget_rate--;
2102                 } else {
2103                         /* for every san_path_err_forget_rate number of
2104                          * successful path checks decrement path_failures by 1
2105                          */
2106                         pp->path_failures--;
2107                         pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
2108                 }
2109                 return 0;
2110         }
2111
2112         /* If the path isn't recovering from a failed state, do nothing */
2113         if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
2114                         pp->state != PATH_TIMEOUT)
2115                 return 0;
2116
2117         if (pp->path_failures == 0)
2118                 pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
2119
2120         pp->path_failures++;
2121
2122         /* if we don't know the currently time, we don't know how long to
2123          * delay the path, so there's no point in checking if we should
2124          */
2125
2126         get_monotonic_time(&curr_time);
2127         /* when path failures has exceeded the san_path_err_threshold
2128          * place the path in delayed state till san_path_err_recovery_time
2129          * so that the cutomer can rectify the issue within this time. After
2130          * the completion of san_path_err_recovery_time it should
2131          * automatically reinstate the path
2132          * (note: we know that san_path_err_threshold > 0 here).
2133          */
2134         if (pp->path_failures > (unsigned int)pp->mpp->san_path_err_threshold) {
2135                 condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
2136                 pp->dis_reinstate_time = curr_time.tv_sec;
2137                 pp->disable_reinstate = 1;
2138
2139                 return 1;
2140         } else {
2141                 return 0;
2142         }
2143
2144 reinstate_path:
2145         pp->path_failures = 0;
2146         pp->disable_reinstate = 0;
2147         pp->san_path_err_forget_rate = 0;
2148         return 0;
2149 }
2150
2151 static int
2152 should_skip_path(struct path *pp){
2153         if (marginal_path_check_enabled(pp->mpp)) {
2154                 if (pp->io_err_disable_reinstate && need_io_err_check(pp))
2155                         return 1;
2156         } else if (san_path_check_enabled(pp->mpp)) {
2157                 if (check_path_reinstate_state(pp))
2158                         return 1;
2159         }
2160         return 0;
2161 }
2162
2163 /*
2164  * Returns '1' if the path has been checked, '-1' if it was blacklisted
2165  * and '0' otherwise
2166  */
2167 int
2168 check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
2169 {
2170         int newstate;
2171         int new_path_up = 0;
2172         int chkr_new_path_up = 0;
2173         int disable_reinstate = 0;
2174         int oldchkrstate = pp->chkrstate;
2175         int retrigger_tries;
2176         unsigned int checkint, max_checkint;
2177         struct config *conf;
2178         int marginal_pathgroups, marginal_changed = 0;
2179         int ret;
2180
2181         if (((pp->initialized == INIT_OK || pp->initialized == INIT_PARTIAL ||
2182               pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp) ||
2183             pp->initialized == INIT_REMOVED)
2184                 return 0;
2185
2186         if (pp->tick)
2187                 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
2188         if (pp->tick)
2189                 return 0; /* don't check this path yet */
2190
2191         conf = get_multipath_config();
2192         retrigger_tries = conf->retrigger_tries;
2193         checkint = conf->checkint;
2194         max_checkint = conf->max_checkint;
2195         marginal_pathgroups = conf->marginal_pathgroups;
2196         put_multipath_config(conf);
2197
2198         if (pp->checkint == CHECKINT_UNDEF) {
2199                 condlog(0, "%s: BUG: checkint is not set", pp->dev);
2200                 pp->checkint = checkint;
2201         };
2202
2203         if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV) {
2204                 if (pp->retriggers < retrigger_tries) {
2205                         condlog(2, "%s: triggering change event to reinitialize",
2206                                 pp->dev);
2207                         pp->initialized = INIT_REQUESTED_UDEV;
2208                         pp->retriggers++;
2209                         sysfs_attr_set_value(pp->udev, "uevent", "change",
2210                                              strlen("change"));
2211                         return 0;
2212                 } else {
2213                         condlog(1, "%s: not initialized after %d udev retriggers",
2214                                 pp->dev, retrigger_tries);
2215                         /*
2216                          * Make sure that the "add missing path" code path
2217                          * below may reinstate the path later, if it ever
2218                          * comes up again.
2219                          * The WWID needs not be cleared; if it was set, the
2220                          * state hadn't been INIT_MISSING_UDEV in the first
2221                          * place.
2222                          */
2223                         pp->initialized = INIT_FAILED;
2224                         return 0;
2225                 }
2226         }
2227
2228         /*
2229          * provision a next check soonest,
2230          * in case we exit abnormaly from here
2231          */
2232         pp->tick = checkint;
2233
2234         newstate = path_offline(pp);
2235         if (newstate == PATH_UP) {
2236                 conf = get_multipath_config();
2237                 pthread_cleanup_push(put_multipath_config, conf);
2238                 newstate = get_state(pp, conf, 1, newstate);
2239                 pthread_cleanup_pop(1);
2240         } else {
2241                 checker_clear_message(&pp->checker);
2242                 condlog(3, "%s: state %s, checker not called",
2243                         pp->dev, checker_state_name(newstate));
2244         }
2245         /*
2246          * Wait for uevent for removed paths;
2247          * some LLDDs like zfcp keep paths unavailable
2248          * without sending uevents.
2249          */
2250         if (newstate == PATH_REMOVED)
2251                 newstate = PATH_DOWN;
2252
2253         if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
2254                 condlog(2, "%s: unusable path (%s) - checker failed",
2255                         pp->dev, checker_state_name(newstate));
2256                 LOG_MSG(2, pp);
2257                 conf = get_multipath_config();
2258                 pthread_cleanup_push(put_multipath_config, conf);
2259                 pathinfo(pp, conf, 0);
2260                 pthread_cleanup_pop(1);
2261                 return 1;
2262         } else if ((newstate != PATH_UP && newstate != PATH_GHOST &&
2263                     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
2264                 /* If path state become failed again cancel path delay state */
2265                 pp->state = newstate;
2266                 /*
2267                  * path state bad again should change the check interval time
2268                  * to the shortest delay
2269                  */
2270                 pp->checkint = checkint;
2271                 return 1;
2272         }
2273         if (!pp->mpp) {
2274                 if (!strlen(pp->wwid) &&
2275                     (pp->initialized == INIT_FAILED ||
2276                      pp->initialized == INIT_NEW) &&
2277                     (newstate == PATH_UP || newstate == PATH_GHOST)) {
2278                         condlog(2, "%s: add missing path", pp->dev);
2279                         conf = get_multipath_config();
2280                         pthread_cleanup_push(put_multipath_config, conf);
2281                         ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
2282                         pthread_cleanup_pop(1);
2283                         /* INIT_OK implies ret == PATHINFO_OK */
2284                         if (pp->initialized == INIT_OK) {
2285                                 ev_add_path(pp, vecs, 1);
2286                                 pp->tick = 1;
2287                         } else {
2288                                 if (ret == PATHINFO_SKIPPED)
2289                                         return -1;
2290                                 /*
2291                                  * We failed multiple times to initialize this
2292                                  * path properly. Don't re-check too often.
2293                                  */
2294                                 pp->checkint = max_checkint;
2295                         }
2296                 }
2297                 return 0;
2298         }
2299         /*
2300          * Async IO in flight. Keep the previous path state
2301          * and reschedule as soon as possible
2302          */
2303         if (newstate == PATH_PENDING) {
2304                 pp->tick = 1;
2305                 return 0;
2306         }
2307         /*
2308          * Synchronize with kernel state
2309          */
2310         ret = update_multipath_strings(pp->mpp, vecs->pathvec);
2311         if (ret != DMP_OK) {
2312                 if (ret == DMP_NOT_FOUND) {
2313                         /* multipath device missing. Likely removed */
2314                         condlog(1, "%s: multipath device '%s' not found",
2315                                 pp->dev, pp->mpp ? pp->mpp->alias : "");
2316                         return 0;
2317                 } else
2318                         condlog(1, "%s: Couldn't synchronize with kernel state",
2319                                 pp->dev);
2320                 pp->dmstate = PSTATE_UNDEF;
2321         }
2322         /* if update_multipath_strings orphaned the path, quit early */
2323         if (!pp->mpp)
2324                 return 0;
2325         set_no_path_retry(pp->mpp);
2326
2327         if (pp->recheck_wwid == RECHECK_WWID_ON &&
2328             (newstate == PATH_UP || newstate == PATH_GHOST) &&
2329             ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
2330              pp->dmstate == PSTATE_FAILED) &&
2331             check_path_wwid_change(pp)) {
2332                 condlog(0, "%s: path wwid change detected. Removing", pp->dev);
2333                 handle_path_wwid_change(pp, vecs);
2334                 return 0;
2335         }
2336
2337         if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
2338             (san_path_check_enabled(pp->mpp) ||
2339              marginal_path_check_enabled(pp->mpp))) {
2340                 if (should_skip_path(pp)) {
2341                         if (!pp->marginal && pp->state != PATH_DELAYED)
2342                                 condlog(2, "%s: path is now marginal", pp->dev);
2343                         if (!marginal_pathgroups) {
2344                                 if (marginal_path_check_enabled(pp->mpp))
2345                                         /* to reschedule as soon as possible,
2346                                          * so that this path can be recovered
2347                                          * in time */
2348                                         pp->tick = 1;
2349                                 pp->state = PATH_DELAYED;
2350                                 return 1;
2351                         }
2352                         if (!pp->marginal) {
2353                                 pp->marginal = 1;
2354                                 marginal_changed = 1;
2355                         }
2356                 } else {
2357                         if (pp->marginal || pp->state == PATH_DELAYED)
2358                                 condlog(2, "%s: path is no longer marginal",
2359                                         pp->dev);
2360                         if (marginal_pathgroups && pp->marginal) {
2361                                 pp->marginal = 0;
2362                                 marginal_changed = 1;
2363                         }
2364                 }
2365         }
2366
2367         /*
2368          * don't reinstate failed path, if its in stand-by
2369          * and if target supports only implicit tpgs mode.
2370          * this will prevent unnecessary i/o by dm on stand-by
2371          * paths if there are no other active paths in map.
2372          */
2373         disable_reinstate = (newstate == PATH_GHOST &&
2374                              count_active_paths(pp->mpp) == 0 &&
2375                              path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0;
2376
2377         pp->chkrstate = newstate;
2378         if (newstate != pp->state) {
2379                 int oldstate = pp->state;
2380                 pp->state = newstate;
2381
2382                 LOG_MSG(1, pp);
2383
2384                 /*
2385                  * upon state change, reset the checkint
2386                  * to the shortest delay
2387                  */
2388                 conf = get_multipath_config();
2389                 pp->checkint = conf->checkint;
2390                 put_multipath_config(conf);
2391
2392                 if (newstate != PATH_UP && newstate != PATH_GHOST) {
2393                         /*
2394                          * proactively fail path in the DM
2395                          */
2396                         if (oldstate == PATH_UP ||
2397                             oldstate == PATH_GHOST)
2398                                 fail_path(pp, 1);
2399                         else
2400                                 fail_path(pp, 0);
2401
2402                         /*
2403                          * cancel scheduled failback
2404                          */
2405                         pp->mpp->failback_tick = 0;
2406
2407                         pp->mpp->stat_path_failures++;
2408                         return 1;
2409                 }
2410
2411                 if (newstate == PATH_UP || newstate == PATH_GHOST) {
2412                         if (pp->mpp->prflag) {
2413                                 /*
2414                                  * Check Persistent Reservation.
2415                                  */
2416                                 condlog(2, "%s: checking persistent "
2417                                         "reservation registration", pp->dev);
2418                                 mpath_pr_event_handle(pp);
2419                         }
2420                 }
2421
2422                 /*
2423                  * reinstate this path
2424                  */
2425                 if (!disable_reinstate)
2426                         reinstate_path(pp);
2427                 new_path_up = 1;
2428
2429                 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
2430                         chkr_new_path_up = 1;
2431
2432                 /*
2433                  * if at least one path is up in a group, and
2434                  * the group is disabled, re-enable it
2435                  */
2436                 if (newstate == PATH_UP)
2437                         enable_group(pp);
2438         }
2439         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
2440                 if ((pp->dmstate == PSTATE_FAILED ||
2441                     pp->dmstate == PSTATE_UNDEF) &&
2442                     !disable_reinstate)
2443                         /* Clear IO errors */
2444                         reinstate_path(pp);
2445                 else {
2446                         LOG_MSG(4, pp);
2447                         if (pp->checkint != max_checkint) {
2448                                 /*
2449                                  * double the next check delay.
2450                                  * max at conf->max_checkint
2451                                  */
2452                                 if (pp->checkint < (max_checkint / 2))
2453                                         pp->checkint = 2 * pp->checkint;
2454                                 else
2455                                         pp->checkint = max_checkint;
2456
2457                                 condlog(4, "%s: delay next check %is",
2458                                         pp->dev_t, pp->checkint);
2459                         }
2460                         pp->tick = pp->checkint;
2461                 }
2462         }
2463         else if (newstate != PATH_UP && newstate != PATH_GHOST) {
2464                 if (pp->dmstate == PSTATE_ACTIVE ||
2465                     pp->dmstate == PSTATE_UNDEF)
2466                         fail_path(pp, 0);
2467                 if (newstate == PATH_DOWN) {
2468                         int log_checker_err;
2469
2470                         conf = get_multipath_config();
2471                         log_checker_err = conf->log_checker_err;
2472                         put_multipath_config(conf);
2473                         if (log_checker_err == LOG_CHKR_ERR_ONCE)
2474                                 LOG_MSG(3, pp);
2475                         else
2476                                 LOG_MSG(2, pp);
2477                 }
2478         }
2479
2480         pp->state = newstate;
2481
2482         if (pp->mpp->wait_for_udev)
2483                 return 1;
2484         /*
2485          * path prio refreshing
2486          */
2487         condlog(4, "path prio refresh");
2488
2489         if (marginal_changed)
2490                 reload_and_sync_map(pp->mpp, vecs, 1);
2491         else if (update_prio(pp, new_path_up) &&
2492             (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
2493              pp->mpp->pgfailback == -FAILBACK_IMMEDIATE) {
2494                 condlog(2, "%s: path priorities changed. reloading",
2495                         pp->mpp->alias);
2496                 reload_and_sync_map(pp->mpp, vecs, !new_path_up);
2497         } else if (need_switch_pathgroup(pp->mpp, 0)) {
2498                 if (pp->mpp->pgfailback > 0 &&
2499                     (new_path_up || pp->mpp->failback_tick <= 0))
2500                         pp->mpp->failback_tick =
2501                                 pp->mpp->pgfailback + 1;
2502                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
2503                          (chkr_new_path_up && followover_should_failback(pp)))
2504                         switch_pathgroup(pp->mpp);
2505         }
2506         return 1;
2507 }
2508
2509 static void *
2510 checkerloop (void *ap)
2511 {
2512         struct vectors *vecs;
2513         struct path *pp;
2514         int count = 0;
2515         unsigned int i;
2516         struct timespec last_time;
2517         struct config *conf;
2518         int foreign_tick = 0;
2519 #ifdef USE_SYSTEMD
2520         bool use_watchdog;
2521 #endif
2522
2523         pthread_cleanup_push(rcu_unregister, NULL);
2524         rcu_register_thread();
2525         mlockall(MCL_CURRENT | MCL_FUTURE);
2526         vecs = (struct vectors *)ap;
2527         condlog(2, "path checkers start up");
2528
2529         /* Tweak start time for initial path check */
2530         get_monotonic_time(&last_time);
2531         last_time.tv_sec -= 1;
2532
2533         /* use_watchdog is set from process environment and never changes */
2534         conf = get_multipath_config();
2535 #ifdef USE_SYSTEMD
2536         use_watchdog = conf->use_watchdog;
2537 #endif
2538         put_multipath_config(conf);
2539
2540         while (1) {
2541                 struct timespec diff_time, start_time, end_time;
2542                 int num_paths = 0, strict_timing, rc = 0;
2543                 unsigned int ticks = 0;
2544
2545                 if (set_config_state(DAEMON_RUNNING) != DAEMON_RUNNING)
2546                         /* daemon shutdown */
2547                         break;
2548
2549                 get_monotonic_time(&start_time);
2550                 if (start_time.tv_sec && last_time.tv_sec) {
2551                         timespecsub(&start_time, &last_time, &diff_time);
2552                         condlog(4, "tick (%ld.%06lu secs)",
2553                                 (long)diff_time.tv_sec, diff_time.tv_nsec / 1000);
2554                         last_time = start_time;
2555                         ticks = diff_time.tv_sec;
2556                 } else {
2557                         ticks = 1;
2558                         condlog(4, "tick (%d ticks)", ticks);
2559                 }
2560 #ifdef USE_SYSTEMD
2561                 if (use_watchdog)
2562                         sd_notify(0, "WATCHDOG=1");
2563 #endif
2564
2565                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2566                 lock(&vecs->lock);
2567                 pthread_testcancel();
2568                 vector_foreach_slot (vecs->pathvec, pp, i) {
2569                         rc = check_path(vecs, pp, ticks);
2570                         if (rc < 0) {
2571                                 condlog(1, "%s: check_path() failed, removing",
2572                                         pp->dev);
2573                                 vector_del_slot(vecs->pathvec, i);
2574                                 free_path(pp);
2575                                 i--;
2576                         } else
2577                                 num_paths += rc;
2578                 }
2579                 lock_cleanup_pop(vecs->lock);
2580
2581                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2582                 lock(&vecs->lock);
2583                 pthread_testcancel();
2584                 defered_failback_tick(vecs->mpvec);
2585                 retry_count_tick(vecs->mpvec);
2586                 missing_uev_wait_tick(vecs);
2587                 ghost_delay_tick(vecs);
2588                 partial_retrigger_tick(vecs->pathvec);
2589                 lock_cleanup_pop(vecs->lock);
2590
2591                 if (count)
2592                         count--;
2593                 else {
2594                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
2595                         lock(&vecs->lock);
2596                         pthread_testcancel();
2597                         condlog(4, "map garbage collection");
2598                         mpvec_garbage_collector(vecs);
2599                         count = MAPGCINT;
2600                         lock_cleanup_pop(vecs->lock);
2601                 }
2602
2603                 diff_time.tv_nsec = 0;
2604                 if (start_time.tv_sec) {
2605                         get_monotonic_time(&end_time);
2606                         timespecsub(&end_time, &start_time, &diff_time);
2607                         if (num_paths) {
2608                                 unsigned int max_checkint;
2609
2610                                 condlog(4, "checked %d path%s in %ld.%06lu secs",
2611                                         num_paths, num_paths > 1 ? "s" : "",
2612                                         (long)diff_time.tv_sec,
2613                                         diff_time.tv_nsec / 1000);
2614                                 conf = get_multipath_config();
2615                                 max_checkint = conf->max_checkint;
2616                                 put_multipath_config(conf);
2617                                 if (diff_time.tv_sec > (time_t)max_checkint)
2618                                         condlog(1, "path checkers took longer "
2619                                                 "than %ld seconds, consider "
2620                                                 "increasing max_polling_interval",
2621                                                 (long)diff_time.tv_sec);
2622                         }
2623                 }
2624
2625                 if (foreign_tick == 0) {
2626                         conf = get_multipath_config();
2627                         foreign_tick = conf->max_checkint;
2628                         put_multipath_config(conf);
2629                 }
2630                 if (--foreign_tick == 0)
2631                         check_foreign();
2632
2633                 post_config_state(DAEMON_IDLE);
2634                 conf = get_multipath_config();
2635                 strict_timing = conf->strict_timing;
2636                 put_multipath_config(conf);
2637                 if (!strict_timing)
2638                         sleep(1);
2639                 else {
2640                         if (diff_time.tv_nsec) {
2641                                 diff_time.tv_sec = 0;
2642                                 diff_time.tv_nsec =
2643                                      1000UL * 1000 * 1000 - diff_time.tv_nsec;
2644                         } else
2645                                 diff_time.tv_sec = 1;
2646
2647                         condlog(3, "waiting for %ld.%06lu secs",
2648                                 (long)diff_time.tv_sec,
2649                                 diff_time.tv_nsec / 1000);
2650                         if (nanosleep(&diff_time, NULL) != 0) {
2651                                 condlog(3, "nanosleep failed with error %d",
2652                                         errno);
2653                                 conf = get_multipath_config();
2654                                 conf->strict_timing = 0;
2655                                 put_multipath_config(conf);
2656                                 break;
2657                         }
2658                 }
2659         }
2660         pthread_cleanup_pop(1);
2661         return NULL;
2662 }
2663
2664 int
2665 configure (struct vectors * vecs)
2666 {
2667         struct multipath * mpp;
2668         struct path * pp;
2669         vector mpvec;
2670         int i, ret;
2671         struct config *conf;
2672
2673         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
2674                 condlog(0, "couldn't allocate path vec in configure");
2675                 return 1;
2676         }
2677
2678         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
2679                 condlog(0, "couldn't allocate multipath vec in configure");
2680                 return 1;
2681         }
2682
2683         if (!(mpvec = vector_alloc())) {
2684                 condlog(0, "couldn't allocate new maps vec in configure");
2685                 return 1;
2686         }
2687
2688         /*
2689          * probe for current path (from sysfs) and map (from dm) sets
2690          */
2691         ret = path_discovery(vecs->pathvec, DI_ALL);
2692         if (ret < 0) {
2693                 condlog(0, "configure failed at path discovery");
2694                 goto fail;
2695         }
2696
2697         if (should_exit())
2698                 goto fail;
2699
2700         conf = get_multipath_config();
2701         pthread_cleanup_push(put_multipath_config, conf);
2702         vector_foreach_slot (vecs->pathvec, pp, i){
2703                 if (filter_path(conf, pp) > 0){
2704                         vector_del_slot(vecs->pathvec, i);
2705                         free_path(pp);
2706                         i--;
2707                 }
2708         }
2709         pthread_cleanup_pop(1);
2710
2711         if (map_discovery(vecs)) {
2712                 condlog(0, "configure failed at map discovery");
2713                 goto fail;
2714         }
2715
2716         if (should_exit())
2717                 goto fail;
2718
2719         ret = coalesce_paths(vecs, mpvec, NULL, reload_type, CMD_NONE);
2720         if (ret != CP_OK) {
2721                 condlog(0, "configure failed while coalescing paths");
2722                 goto fail;
2723         }
2724
2725         if (should_exit())
2726                 goto fail;
2727
2728         /*
2729          * may need to remove some maps which are no longer relevant
2730          * e.g., due to blacklist changes in conf file
2731          */
2732         if (coalesce_maps(vecs, mpvec)) {
2733                 condlog(0, "configure failed while coalescing maps");
2734                 goto fail;
2735         }
2736
2737         if (should_exit())
2738                 goto fail;
2739
2740         sync_maps_state(mpvec);
2741         vector_foreach_slot(mpvec, mpp, i){
2742                 if (remember_wwid(mpp->wwid) == 1)
2743                         trigger_paths_udev_change(mpp, true);
2744                 update_map_pr(mpp);
2745         }
2746
2747         /*
2748          * purge dm of old maps and save new set of maps formed by
2749          * considering current path state
2750          */
2751         remove_maps(vecs);
2752         vecs->mpvec = mpvec;
2753
2754         /*
2755          * start dm event waiter threads for these new maps
2756          */
2757         vector_foreach_slot(vecs->mpvec, mpp, i) {
2758                 if (wait_for_events(mpp, vecs)) {
2759                         remove_map(mpp, vecs->pathvec, vecs->mpvec);
2760                         i--;
2761                         continue;
2762                 }
2763                 if (setup_multipath(vecs, mpp))
2764                         i--;
2765         }
2766         return 0;
2767
2768 fail:
2769         vector_free(mpvec);
2770         return 1;
2771 }
2772
2773 int
2774 need_to_delay_reconfig(struct vectors * vecs)
2775 {
2776         struct multipath *mpp;
2777         int i;
2778
2779         if (!VECTOR_SIZE(vecs->mpvec))
2780                 return 0;
2781
2782         vector_foreach_slot(vecs->mpvec, mpp, i) {
2783                 if (mpp->wait_for_udev)
2784                         return 1;
2785         }
2786         return 0;
2787 }
2788
2789 void rcu_free_config(struct rcu_head *head)
2790 {
2791         struct config *conf = container_of(head, struct config, rcu);
2792
2793         free_config(conf);
2794 }
2795
2796 int
2797 reconfigure (struct vectors * vecs)
2798 {
2799         struct config * old, *conf;
2800
2801         conf = load_config(DEFAULT_CONFIGFILE);
2802         if (!conf)
2803                 return 1;
2804
2805         if (verbosity)
2806                 libmp_verbosity = verbosity;
2807         setlogmask(LOG_UPTO(libmp_verbosity + 3));
2808
2809         /*
2810          * free old map and path vectors ... they use old conf state
2811          */
2812         if (VECTOR_SIZE(vecs->mpvec))
2813                 remove_maps_and_stop_waiters(vecs);
2814
2815         free_pathvec(vecs->pathvec, FREE_PATHS);
2816         vecs->pathvec = NULL;
2817         delete_all_foreign();
2818
2819         reset_checker_classes();
2820         if (bindings_read_only)
2821                 conf->bindings_read_only = bindings_read_only;
2822
2823         if (check_alias_settings(conf))
2824                 return 1;
2825
2826         uxsock_timeout = conf->uxsock_timeout;
2827
2828         old = rcu_dereference(multipath_conf);
2829         conf->sequence_nr = old->sequence_nr + 1;
2830         rcu_assign_pointer(multipath_conf, conf);
2831         call_rcu(&old->rcu, rcu_free_config);
2832
2833         configure(vecs);
2834
2835
2836         return 0;
2837 }
2838
2839 static struct vectors *
2840 init_vecs (void)
2841 {
2842         struct vectors * vecs;
2843
2844         vecs = (struct vectors *)calloc(1, sizeof(struct vectors));
2845
2846         if (!vecs)
2847                 return NULL;
2848
2849         pthread_mutex_init(&vecs->lock.mutex, NULL);
2850
2851         return vecs;
2852 }
2853
2854 static void *
2855 signal_set(int signo, void (*func) (int))
2856 {
2857         int r;
2858         struct sigaction sig;
2859         struct sigaction osig;
2860
2861         sig.sa_handler = func;
2862         sigemptyset(&sig.sa_mask);
2863         sig.sa_flags = 0;
2864
2865         r = sigaction(signo, &sig, &osig);
2866
2867         if (r < 0)
2868                 return (SIG_ERR);
2869         else
2870                 return (osig.sa_handler);
2871 }
2872
2873 void
2874 handle_signals(bool nonfatal)
2875 {
2876         if (exit_sig) {
2877                 condlog(2, "exit (signal)");
2878                 exit_sig = 0;
2879                 exit_daemon();
2880         }
2881         if (!nonfatal)
2882                 return;
2883         if (reconfig_sig) {
2884                 condlog(2, "reconfigure (signal)");
2885                 schedule_reconfigure(FORCE_RELOAD_WEAK);
2886         }
2887         if (log_reset_sig) {
2888                 condlog(2, "reset log (signal)");
2889                 if (logsink == LOGSINK_SYSLOG)
2890                         log_thread_reset();
2891         }
2892         reconfig_sig = 0;
2893         log_reset_sig = 0;
2894 }
2895
2896 static void
2897 sighup(__attribute__((unused)) int sig)
2898 {
2899         reconfig_sig = 1;
2900 }
2901
2902 static void
2903 sigend(__attribute__((unused)) int sig)
2904 {
2905         exit_sig = 1;
2906 }
2907
2908 static void
2909 sigusr1(__attribute__((unused)) int sig)
2910 {
2911         log_reset_sig = 1;
2912 }
2913
2914 static void
2915 sigusr2(__attribute__((unused)) int sig)
2916 {
2917         condlog(3, "SIGUSR2 received");
2918 }
2919
2920 static void
2921 signal_init(void)
2922 {
2923         sigset_t set;
2924
2925         /* block all signals */
2926         sigfillset(&set);
2927         /* SIGPIPE occurs if logging fails */
2928         sigdelset(&set, SIGPIPE);
2929         pthread_sigmask(SIG_SETMASK, &set, NULL);
2930
2931         /* Other signals will be unblocked in the uxlsnr thread */
2932         signal_set(SIGHUP, sighup);
2933         signal_set(SIGUSR1, sigusr1);
2934         signal_set(SIGUSR2, sigusr2);
2935         signal_set(SIGINT, sigend);
2936         signal_set(SIGTERM, sigend);
2937         signal_set(SIGPIPE, sigend);
2938 }
2939
2940 static void
2941 setscheduler (void)
2942 {
2943         int res;
2944         static struct sched_param sched_param = {
2945                 .sched_priority = 99
2946         };
2947
2948         res = sched_setscheduler (0, SCHED_RR, &sched_param);
2949
2950         if (res == -1)
2951                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2952         return;
2953 }
2954
2955 static void set_oom_adj(void)
2956 {
2957         FILE *fp;
2958
2959         if (getenv("OOMScoreAdjust")) {
2960                 condlog(3, "Using systemd provided OOMScoreAdjust");
2961                 return;
2962         }
2963 #ifdef OOM_SCORE_ADJ_MIN
2964         fp = fopen("/proc/self/oom_score_adj", "w");
2965         if (fp) {
2966                 fprintf(fp, "%i", OOM_SCORE_ADJ_MIN);
2967                 fclose(fp);
2968                 return;
2969         }
2970 #endif
2971         fp = fopen("/proc/self/oom_adj", "w");
2972         if (fp) {
2973                 fprintf(fp, "%i", OOM_ADJUST_MIN);
2974                 fclose(fp);
2975                 return;
2976         }
2977         condlog(0, "couldn't adjust oom score");
2978 }
2979
2980 static void cleanup_pidfile(void)
2981 {
2982         if (pid_fd >= 0)
2983                 close(pid_fd);
2984         condlog(3, "unlink pidfile");
2985         unlink(DEFAULT_PIDFILE);
2986 }
2987
2988 static void cleanup_conf(void) {
2989         struct config *conf;
2990
2991         conf = rcu_dereference(multipath_conf);
2992         if (!conf)
2993                 return;
2994         rcu_assign_pointer(multipath_conf, NULL);
2995         call_rcu(&conf->rcu, rcu_free_config);
2996 }
2997
2998 static void cleanup_maps(struct vectors *vecs)
2999 {
3000         int queue_without_daemon, i;
3001         struct multipath *mpp;
3002         struct config *conf;
3003
3004         conf = get_multipath_config();
3005         queue_without_daemon = conf->queue_without_daemon;
3006         put_multipath_config(conf);
3007         if (queue_without_daemon == QUE_NO_DAEMON_OFF)
3008                 vector_foreach_slot(vecs->mpvec, mpp, i)
3009                         dm_queue_if_no_path(mpp->alias, 0);
3010         remove_maps_and_stop_waiters(vecs);
3011         vecs->mpvec = NULL;
3012 }
3013
3014 static void cleanup_paths(struct vectors *vecs)
3015 {
3016         free_pathvec(vecs->pathvec, FREE_PATHS);
3017         vecs->pathvec = NULL;
3018 }
3019
3020 static void cleanup_vecs(void)
3021 {
3022         if (!gvecs)
3023                 return;
3024         /*
3025          * We can't take the vecs lock here, because exit() may
3026          * have been called from the child() thread, holding the lock already.
3027          * Anyway, by the time we get here, all threads that might access
3028          * vecs should have been joined already (in cleanup_threads).
3029          */
3030         cleanup_maps(gvecs);
3031         cleanup_paths(gvecs);
3032         pthread_mutex_destroy(&gvecs->lock.mutex);
3033         free(gvecs);
3034         gvecs = NULL;
3035 }
3036
3037 static void cleanup_threads(void)
3038 {
3039         stop_io_err_stat_thread();
3040
3041         if (check_thr_started)
3042                 pthread_cancel(check_thr);
3043         if (uevent_thr_started)
3044                 pthread_cancel(uevent_thr);
3045         if (uxlsnr_thr_started)
3046                 pthread_cancel(uxlsnr_thr);
3047         if (uevq_thr_started)
3048                 pthread_cancel(uevq_thr);
3049         if (dmevent_thr_started)
3050                 pthread_cancel(dmevent_thr);
3051
3052         if (check_thr_started)
3053                 pthread_join(check_thr, NULL);
3054         if (uevent_thr_started)
3055                 pthread_join(uevent_thr, NULL);
3056         if (uxlsnr_thr_started)
3057                 pthread_join(uxlsnr_thr, NULL);
3058         if (uevq_thr_started)
3059                 pthread_join(uevq_thr, NULL);
3060         if (dmevent_thr_started)
3061                 pthread_join(dmevent_thr, NULL);
3062
3063         /*
3064          * As all threads are joined now, and we're in DAEMON_SHUTDOWN
3065          * state, no new waiter threads will be created any more.
3066          */
3067         pthread_attr_destroy(&waiter_attr);
3068 }
3069
3070 #ifndef URCU_VERSION
3071 #  define URCU_VERSION 0
3072 #endif
3073 #if (URCU_VERSION >= 0x000800)
3074 /*
3075  * Use a non-default call_rcu_data for child().
3076  *
3077  * We do this to avoid a memory leak from liburcu.
3078  * liburcu never frees the default rcu handler (see comments on
3079  * call_rcu_data_free() in urcu-call-rcu-impl.h), its thread
3080  * can't be joined with pthread_join(), leaving a memory leak.
3081  *
3082  * Therefore we create our own, which can be destroyed and joined.
3083  * The cleanup handler needs to call rcu_barrier(), which is only
3084  * available in user-space RCU v0.8 and newer. See
3085  * https://lists.lttng.org/pipermail/lttng-dev/2021-May/029958.html
3086  */
3087 static struct call_rcu_data *setup_rcu(void)
3088 {
3089         struct call_rcu_data *crdp;
3090
3091         rcu_init();
3092         rcu_register_thread();
3093         crdp = create_call_rcu_data(0UL, -1);
3094         if (crdp != NULL)
3095                 set_thread_call_rcu_data(crdp);
3096         return crdp;
3097 }
3098
3099 static struct call_rcu_data *mp_rcu_data;
3100
3101 static void cleanup_rcu(void)
3102 {
3103         pthread_t rcu_thread;
3104
3105         /* Wait for any pending RCU calls */
3106         rcu_barrier();
3107         if (mp_rcu_data != NULL) {
3108                 rcu_thread = get_call_rcu_thread(mp_rcu_data);
3109                 /* detach this thread from the RCU thread */
3110                 set_thread_call_rcu_data(NULL);
3111                 synchronize_rcu();
3112                 /* tell RCU thread to exit */
3113                 call_rcu_data_free(mp_rcu_data);
3114                 pthread_join(rcu_thread, NULL);
3115         }
3116         rcu_unregister_thread();
3117 }
3118 #endif /* URCU_VERSION */
3119
3120 static void cleanup_child(void)
3121 {
3122         cleanup_threads();
3123         cleanup_vecs();
3124         if (poll_dmevents)
3125                 cleanup_dmevent_waiter();
3126
3127         cleanup_pidfile();
3128         if (logsink == LOGSINK_SYSLOG)
3129                 log_thread_stop();
3130
3131         cleanup_conf();
3132 }
3133
3134 static int sd_notify_exit(int err)
3135 {
3136 #ifdef USE_SYSTEMD
3137         char msg[24];
3138
3139         snprintf(msg, sizeof(msg), "ERRNO=%d", err);
3140         sd_notify(0, msg);
3141 #endif
3142         return err;
3143 }
3144
3145 static int
3146 child (__attribute__((unused)) void *param)
3147 {
3148         pthread_attr_t log_attr, misc_attr, uevent_attr;
3149         struct vectors * vecs;
3150         int rc;
3151         struct config *conf;
3152         char *envp;
3153         enum daemon_status state;
3154         int exit_code = 1;
3155
3156         init_unwinder();
3157         mlockall(MCL_CURRENT | MCL_FUTURE);
3158         signal_init();
3159 #if (URCU_VERSION >= 0x000800)
3160         mp_rcu_data = setup_rcu();
3161         if (atexit(cleanup_rcu))
3162                 fprintf(stderr, "failed to register RCU cleanup handler\n");
3163 #else
3164         rcu_init();
3165 #endif
3166         if (atexit(cleanup_child))
3167                 fprintf(stderr, "failed to register cleanup handlers\n");
3168
3169         setup_thread_attr(&misc_attr, 64 * 1024, 0);
3170         setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
3171         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
3172
3173         if (logsink == LOGSINK_SYSLOG) {
3174                 setup_thread_attr(&log_attr, 64 * 1024, 0);
3175                 log_thread_start(&log_attr);
3176                 pthread_attr_destroy(&log_attr);
3177         }
3178         pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
3179         if (pid_fd < 0) {
3180                 condlog(1, "failed to create pidfile");
3181                 exit(1);
3182         }
3183
3184         post_config_state(DAEMON_START);
3185
3186         condlog(2, "--------start up--------");
3187         condlog(2, "read " DEFAULT_CONFIGFILE);
3188
3189         if (verbosity)
3190                 libmp_verbosity = verbosity;
3191         conf = load_config(DEFAULT_CONFIGFILE);
3192         if (verbosity)
3193                 libmp_verbosity = verbosity;
3194         setlogmask(LOG_UPTO(libmp_verbosity + 3));
3195
3196         if (!conf) {
3197                 condlog(0, "failed to load configuration");
3198                 goto failed;
3199         }
3200
3201         if (bindings_read_only)
3202                 conf->bindings_read_only = bindings_read_only;
3203         uxsock_timeout = conf->uxsock_timeout;
3204         rcu_assign_pointer(multipath_conf, conf);
3205         if (init_checkers(conf->multipath_dir)) {
3206                 condlog(0, "failed to initialize checkers");
3207                 goto failed;
3208         }
3209         if (init_prio(conf->multipath_dir)) {
3210                 condlog(0, "failed to initialize prioritizers");
3211                 goto failed;
3212         }
3213         /* Failing this is non-fatal */
3214
3215         init_foreign(conf->multipath_dir, conf->enable_foreign);
3216
3217         if (poll_dmevents)
3218                 poll_dmevents = dmevent_poll_supported();
3219
3220         envp = getenv("LimitNOFILE");
3221
3222         if (envp)
3223                 condlog(2,"Using systemd provided open fds limit of %s", envp);
3224         else
3225                 set_max_fds(conf->max_fds);
3226
3227         vecs = gvecs = init_vecs();
3228         if (!vecs)
3229                 goto failed;
3230
3231         setscheduler();
3232         set_oom_adj();
3233
3234         /*
3235          * Startup done, invalidate configuration
3236          */
3237         conf = NULL;
3238
3239         pthread_cleanup_push(config_cleanup, NULL);
3240         pthread_mutex_lock(&config_lock);
3241
3242         __post_config_state(DAEMON_IDLE);
3243         rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
3244         if (!rc) {
3245                 /* Wait for uxlsnr startup */
3246                 while (running_state == DAEMON_IDLE)
3247                         pthread_cond_wait(&config_cond, &config_lock);
3248                 state = running_state;
3249         }
3250         pthread_cleanup_pop(1);
3251
3252         if (rc) {
3253                 condlog(0, "failed to create cli listener: %d", rc);
3254                 goto failed;
3255         }
3256         else {
3257                 uxlsnr_thr_started = true;
3258                 if (state != DAEMON_CONFIGURE) {
3259                         condlog(0, "cli listener failed to start");
3260                         goto failed;
3261                 }
3262         }
3263
3264         if (poll_dmevents) {
3265                 if (init_dmevent_waiter(vecs)) {
3266                         condlog(0, "failed to allocate dmevents waiter info");
3267                         goto failed;
3268                 }
3269                 if ((rc = pthread_create(&dmevent_thr, &misc_attr,
3270                                          wait_dmevents, NULL))) {
3271                         condlog(0, "failed to create dmevent waiter thread: %d",
3272                                 rc);
3273                         goto failed;
3274                 } else
3275                         dmevent_thr_started = true;
3276         }
3277
3278         /*
3279          * Start uevent listener early to catch events
3280          */
3281         if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
3282                 condlog(0, "failed to create uevent thread: %d", rc);
3283                 goto failed;
3284         } else
3285                 uevent_thr_started = true;
3286         pthread_attr_destroy(&uevent_attr);
3287
3288         /*
3289          * start threads
3290          */
3291         if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
3292                 condlog(0,"failed to create checker loop thread: %d", rc);
3293                 goto failed;
3294         } else
3295                 check_thr_started = true;
3296         if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
3297                 condlog(0, "failed to create uevent dispatcher: %d", rc);
3298                 goto failed;
3299         } else
3300                 uevq_thr_started = true;
3301         pthread_attr_destroy(&misc_attr);
3302
3303         while (1) {
3304                 pthread_cleanup_push(config_cleanup, NULL);
3305                 pthread_mutex_lock(&config_lock);
3306                 while (running_state != DAEMON_CONFIGURE &&
3307                        running_state != DAEMON_SHUTDOWN)
3308                         pthread_cond_wait(&config_cond, &config_lock);
3309                 state = running_state;
3310                 pthread_cleanup_pop(1);
3311                 if (state == DAEMON_SHUTDOWN)
3312                         break;
3313                 if (state == DAEMON_CONFIGURE) {
3314                         int rc = 0;
3315
3316                         pthread_cleanup_push(cleanup_lock, &vecs->lock);
3317                         lock(&vecs->lock);
3318                         pthread_testcancel();
3319                         if (!need_to_delay_reconfig(vecs))
3320                                 rc = reconfigure(vecs);
3321                         else
3322                                 enable_delayed_reconfig();
3323                         lock_cleanup_pop(vecs->lock);
3324                         if (!rc)
3325                                 post_config_state(DAEMON_IDLE);
3326                         else {
3327                                 condlog(0, "fatal error applying configuration - aborting");
3328                                 exit_daemon();
3329                         }
3330                 }
3331         }
3332
3333         exit_code = 0;
3334 failed:
3335         condlog(2, "--------shut down-------");
3336         /* All cleanup is done in the cleanup_child() exit handler */
3337         return sd_notify_exit(exit_code);
3338 }
3339
3340 static void cleanup_close(int *pfd)
3341 {
3342         if (*pfd != -1 && *pfd != STDIN_FILENO && *pfd != STDOUT_FILENO &&
3343             *pfd != STDERR_FILENO)
3344                 close(*pfd);
3345 }
3346
3347 static int
3348 daemonize(void)
3349 {
3350         int pid;
3351         int dev_null_fd __attribute__((cleanup(cleanup_close))) = -1;
3352
3353         if( (pid = fork()) < 0){
3354                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
3355                 return -1;
3356         }
3357         else if (pid != 0)
3358                 return pid;
3359
3360         setsid();
3361
3362         if ( (pid = fork()) < 0)
3363                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
3364         else if (pid != 0)
3365                 _exit(0);
3366
3367         if (chdir("/") < 0)
3368                 fprintf(stderr, "cannot chdir to '/', continuing\n");
3369
3370         dev_null_fd = open("/dev/null", O_RDWR);
3371         if (dev_null_fd < 0){
3372                 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
3373                         strerror(errno));
3374                 _exit(0);
3375         }
3376
3377         if (dup2(dev_null_fd, STDIN_FILENO) < 0) {
3378                 fprintf(stderr, "cannot dup2 /dev/null to stdin : %s\n",
3379                         strerror(errno));
3380                 _exit(0);
3381         }
3382         if (dup2(dev_null_fd, STDOUT_FILENO) < 0) {
3383                 fprintf(stderr, "cannot dup2 /dev/null to stdout : %s\n",
3384                         strerror(errno));
3385                 _exit(0);
3386         }
3387         if (dup2(dev_null_fd, STDERR_FILENO) < 0) {
3388                 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
3389                         strerror(errno));
3390                 _exit(0);
3391         }
3392         daemon_pid = getpid();
3393         return 0;
3394 }
3395
3396 int
3397 main (int argc, char *argv[])
3398 {
3399         extern char *optarg;
3400         extern int optind;
3401         int arg;
3402         int err;
3403         int foreground = 0;
3404         struct config *conf;
3405         char *opt_k_arg = NULL;
3406         bool opt_k = false;
3407
3408         ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
3409                                    "Manipulated through RCU");
3410         ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
3411                 "Suppress complaints about this scalar variable");
3412
3413         logsink = LOGSINK_SYSLOG;
3414
3415         /* make sure we don't lock any path */
3416         if (chdir("/") < 0)
3417                 fprintf(stderr, "can't chdir to root directory : %s\n",
3418                         strerror(errno));
3419         umask(umask(077) | 022);
3420
3421         pthread_cond_init_mono(&config_cond);
3422
3423         if (atexit(dm_lib_exit))
3424                 condlog(3, "failed to register exit handler for libdm");
3425
3426         libmultipath_init();
3427         if (atexit(libmultipath_exit))
3428                 condlog(3, "failed to register exit handler for libmultipath");
3429         libmp_udev_set_sync_support(0);
3430
3431         while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) {
3432                 switch(arg) {
3433                 case 'd':
3434                         foreground = 1;
3435                         if (logsink == LOGSINK_SYSLOG)
3436                                 logsink = LOGSINK_STDERR_WITH_TIME;
3437                         break;
3438                 case 'v':
3439                         if (sizeof(optarg) > sizeof(char *) ||
3440                             !isdigit(optarg[0]))
3441                                 exit(1);
3442
3443                         libmp_verbosity = verbosity = atoi(optarg);
3444                         break;
3445                 case 's':
3446                         logsink = LOGSINK_STDERR_WITHOUT_TIME;
3447                         break;
3448                 case 'k':
3449                         opt_k = true;
3450                         opt_k_arg = optarg;
3451                         break;
3452                 case 'B':
3453                         bindings_read_only = 1;
3454                         break;
3455                 case 'n':
3456                         condlog(0, "WARNING: ignoring deprecated option -n, use 'ignore_wwids = no' instead");
3457                         break;
3458                 case 'w':
3459                         poll_dmevents = 0;
3460                         break;
3461                 default:
3462                         fprintf(stderr, "Invalid argument '-%c'\n",
3463                                 optopt);
3464                         exit(1);
3465                 }
3466         }
3467         if (opt_k || optind < argc) {
3468                 char cmd[CMDSIZE];
3469                 char * s = cmd;
3470                 char * c = s;
3471
3472                 logsink = LOGSINK_STDERR_WITH_TIME;
3473                 if (verbosity)
3474                         libmp_verbosity = verbosity;
3475                 conf = load_config(DEFAULT_CONFIGFILE);
3476                 if (!conf)
3477                         exit(1);
3478                 if (verbosity)
3479                         libmp_verbosity = verbosity;
3480                 uxsock_timeout = conf->uxsock_timeout;
3481                 memset(cmd, 0x0, CMDSIZE);
3482                 if (opt_k)
3483                         s = opt_k_arg;
3484                 else {
3485                         while (optind < argc) {
3486                                 if (strchr(argv[optind], ' '))
3487                                         c += snprintf(c, s + CMDSIZE - c,
3488                                                       "\"%s\" ", argv[optind]);
3489                                 else
3490                                         c += snprintf(c, s + CMDSIZE - c,
3491                                                       "%s ", argv[optind]);
3492                                 optind++;
3493                         }
3494                         c += snprintf(c, s + CMDSIZE - c, "\n");
3495                 }
3496                 err = uxclnt(s, uxsock_timeout + 100);
3497                 free_config(conf);
3498                 return err;
3499         }
3500
3501         if (getuid() != 0) {
3502                 fprintf(stderr, "need to be root\n");
3503                 exit(1);
3504         }
3505
3506         if (foreground) {
3507                 if (!isatty(fileno(stdout)))
3508                         setbuf(stdout, NULL);
3509                 err = 0;
3510                 daemon_pid = getpid();
3511         } else
3512                 err = daemonize();
3513
3514         if (err < 0)
3515                 /* error */
3516                 exit(1);
3517         else if (err > 0)
3518                 /* parent dies */
3519                 exit(0);
3520         else
3521                 /* child lives */
3522                 return (child(NULL));
3523 }
3524
3525 void *  mpath_pr_event_handler_fn (void * pathp )
3526 {
3527         struct multipath * mpp;
3528         unsigned int i;
3529         int ret, isFound;
3530         struct path * pp = (struct path *)pathp;
3531         struct prout_param_descriptor *param;
3532         struct prin_resp *resp;
3533
3534         rcu_register_thread();
3535         mpp = pp->mpp;
3536
3537         resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
3538         if (!resp){
3539                 condlog(0,"%s Alloc failed for prin response", pp->dev);
3540                 goto out;
3541         }
3542
3543         ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
3544         if (ret != MPATH_PR_SUCCESS )
3545         {
3546                 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
3547                 goto out;
3548         }
3549
3550         condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
3551                         resp->prin_descriptor.prin_readkeys.additional_length );
3552
3553         if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
3554         {
3555                 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
3556                 goto out;
3557         }
3558         condlog(2, "Multipath  reservation_key: 0x%" PRIx64 " ",
3559                 get_be64(mpp->reservation_key));
3560
3561         isFound =0;
3562         for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
3563         {
3564                 condlog(2, "PR IN READKEYS[%d]  reservation key:",i);
3565                 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
3566                 if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
3567                 {
3568                         condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
3569                         isFound =1;
3570                         break;
3571                 }
3572         }
3573         if (!isFound)
3574         {
3575                 condlog(0, "%s: Either device not registered or ", pp->dev);
3576                 condlog(0, "host is not authorised for registration. Skip path");
3577                 goto out;
3578         }
3579
3580         param = (struct prout_param_descriptor *)calloc(1, sizeof(struct prout_param_descriptor));
3581         if (!param)
3582                 goto out;
3583
3584         param->sa_flags = mpp->sa_flags;
3585         memcpy(param->sa_key, &mpp->reservation_key, 8);
3586         param->num_transportid = 0;
3587
3588         condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
3589
3590         ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
3591         if (ret != MPATH_PR_SUCCESS )
3592         {
3593                 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
3594         }
3595         mpp->prflag = 1;
3596
3597         free(param);
3598 out:
3599         if (resp)
3600                 free(resp);
3601         rcu_unregister_thread();
3602         return NULL;
3603 }
3604
3605 int mpath_pr_event_handle(struct path *pp)
3606 {
3607         pthread_t thread;
3608         int rc;
3609         pthread_attr_t attr;
3610         struct multipath * mpp;
3611
3612         if (pp->bus != SYSFS_BUS_SCSI)
3613                 return 0;
3614
3615         mpp = pp->mpp;
3616
3617         if (!get_be64(mpp->reservation_key))
3618                 return -1;
3619
3620         pthread_attr_init(&attr);
3621         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3622
3623         rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
3624         if (rc) {
3625                 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
3626                 return -1;
3627         }
3628         pthread_attr_destroy(&attr);
3629         rc = pthread_join(thread, NULL);
3630         return 0;
3631 }