Check for empty mpvecs in mpvec_garbage_collector()
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18
19 /*
20  * libcheckers
21  */
22 #include <checkers.h>
23
24 /*
25  * libmultipath
26  */
27 #include <parser.h>
28 #include <vector.h>
29 #include <memory.h>
30 #include <config.h>
31 #include <util.h>
32 #include <hwtable.h>
33 #include <defaults.h>
34 #include <structs.h>
35 #include <callout.h>
36 #include <blacklist.h>
37 #include <structs_vec.h>
38 #include <dmparser.h>
39 #include <devmapper.h>
40 #include <sysfs.h>
41 #include <dict.h>
42 #include <discovery.h>
43 #include <debug.h>
44 #include <propsel.h>
45 #include <uevent.h>
46 #include <switchgroup.h>
47 #include <print.h>
48 #include <configure.h>
49 #include <prio.h>
50
51 #include "main.h"
52 #include "pidfile.h"
53 #include "uxlsnr.h"
54 #include "uxclnt.h"
55 #include "cli.h"
56 #include "cli_handlers.h"
57 #include "lock.h"
58 #include "waiter.h"
59
60 #define FILE_NAME_SIZE 256
61 #define CMDSIZE 160
62
63 #define LOG_MSG(a,b) \
64         if (strlen(b)) condlog(a, "%s: %s", pp->dev, b);
65
66 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
67 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
68
69 int logsink;
70
71 /*
72  * global copy of vecs for use in sig handlers
73  */
74 struct vectors * gvecs;
75
76 static int
77 need_switch_pathgroup (struct multipath * mpp, int refresh)
78 {
79         struct pathgroup * pgp;
80         struct path * pp;
81         unsigned int i, j;
82
83         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
84                 return 0;
85
86         /*
87          * Refresh path priority values
88          */
89         if (refresh)
90                 vector_foreach_slot (mpp->pg, pgp, i)
91                         vector_foreach_slot (pgp->paths, pp, j)
92                                 pathinfo(pp, conf->hwtable, DI_PRIO);
93
94         mpp->bestpg = select_path_group(mpp);
95
96         if (mpp->bestpg != mpp->nextpg)
97                 return 1;
98
99         return 0;
100 }
101
102 static void
103 switch_pathgroup (struct multipath * mpp)
104 {
105         mpp->stat_switchgroup++;
106         dm_switchgroup(mpp->alias, mpp->bestpg);
107         condlog(2, "%s: switch to path group #%i",
108                  mpp->alias, mpp->bestpg);
109 }
110
111 static int
112 coalesce_maps(struct vectors *vecs, vector nmpv)
113 {
114         struct multipath * ompp;
115         vector ompv = vecs->mpvec;
116         unsigned int i;
117         int j;
118
119         vector_foreach_slot (ompv, ompp, i) {
120                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
121                         /*
122                          * remove all current maps not allowed by the
123                          * current configuration
124                          */
125                         if (dm_flush_map(ompp->alias)) {
126                                 condlog(0, "%s: unable to flush devmap",
127                                         ompp->alias);
128                                 /*
129                                  * may be just because the device is open
130                                  */
131                                 if (!vector_alloc_slot(nmpv))
132                                         return 1;
133
134                                 vector_set_slot(nmpv, ompp);
135                                 setup_multipath(vecs, ompp);
136
137                                 if ((j = find_slot(ompv, (void *)ompp)) != -1)
138                                         vector_del_slot(ompv, j);
139
140                                 continue;
141                         }
142                         else {
143                                 dm_lib_release();
144                                 condlog(2, "%s devmap removed", ompp->alias);
145                         }
146                 }
147         }
148         return 0;
149 }
150
151 void
152 sync_map_state(struct multipath *mpp)
153 {
154         struct pathgroup *pgp;
155         struct path *pp;
156         unsigned int i, j;
157
158         if (!mpp->pg)
159                 return;
160
161         vector_foreach_slot (mpp->pg, pgp, i){
162                 vector_foreach_slot (pgp->paths, pp, j){
163                         if (pp->state == PATH_UNCHECKED || 
164                             pp->state == PATH_WILD)
165                                 continue;
166                         if ((pp->dmstate == PSTATE_FAILED ||
167                              pp->dmstate == PSTATE_UNDEF) &&
168                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
169                                 dm_reinstate_path(mpp->alias, pp->dev_t);
170                         else if ((pp->dmstate == PSTATE_ACTIVE ||
171                                   pp->dmstate == PSTATE_UNDEF) &&
172                                  (pp->state == PATH_DOWN ||
173                                   pp->state == PATH_SHAKY))
174                                 dm_fail_path(mpp->alias, pp->dev_t);
175                 }
176         }
177 }
178
179 static void
180 sync_maps_state(vector mpvec)
181 {
182         unsigned int i;
183         struct multipath *mpp;
184
185         vector_foreach_slot (mpvec, mpp, i)
186                 sync_map_state(mpp);
187 }
188
189 static int
190 flush_map(struct multipath * mpp, struct vectors * vecs)
191 {
192         /*
193          * clear references to this map before flushing so we can ignore
194          * the spurious uevent we may generate with the dm_flush_map call below
195          */
196         if (dm_flush_map(mpp->alias)) {
197                 /*
198                  * May not really be an error -- if the map was already flushed
199                  * from the device mapper by dmsetup(8) for instance.
200                  */
201                 condlog(0, "%s: can't flush", mpp->alias);
202                 return 1;
203         }
204         else {
205                 dm_lib_release();
206                 condlog(2, "%s: devmap removed", mpp->alias);
207         }
208
209         orphan_paths(vecs->pathvec, mpp);
210         remove_map_and_stop_waiter(mpp, vecs, 1);
211
212         return 0;
213 }
214
215 static int
216 uev_add_map (struct sysfs_device * dev, struct vectors * vecs)
217 {
218         condlog(2, "%s: add map (uevent)", dev->kernel);
219         return ev_add_map(dev, vecs);
220 }
221
222 int
223 ev_add_map (struct sysfs_device * dev, struct vectors * vecs)
224 {
225         char * alias;
226         char *dev_t;
227         int major, minor;
228         char * refwwid;
229         struct multipath * mpp;
230         int map_present;
231         int r = 1;
232
233         dev_t = sysfs_attr_get_value(dev->devpath, "dev");
234
235         if (!dev_t || sscanf(dev_t, "%d:%d", &major, &minor) != 2)
236                 return 1;
237
238         alias = dm_mapname(major, minor);
239
240         if (!alias)
241                 return 1;
242
243         map_present = dm_map_present(alias);
244
245         if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
246                 condlog(4, "%s: not a multipath map", alias);
247                 FREE(alias);
248                 return 0;
249         }
250
251         mpp = find_mp_by_alias(vecs->mpvec, alias);
252
253         if (mpp) {
254                 /*
255                  * Not really an error -- we generate our own uevent
256                  * if we create a multipath mapped device as a result
257                  * of uev_add_path
258                  */
259                 condlog(0, "%s: devmap already registered",
260                         dev->kernel);
261                 FREE(alias);
262                 return 0;
263         }
264
265         /*
266          * now we can register the map
267          */
268         if (map_present && (mpp = add_map_without_path(vecs, minor, alias))) {
269                 sync_map_state(mpp);
270                 condlog(2, "%s: devmap %s added", alias, dev->kernel);
271                 return 0;
272         }
273         refwwid = get_refwwid(dev->kernel, DEV_DEVMAP, vecs->pathvec);
274
275         if (refwwid) {
276                 r = coalesce_paths(vecs, NULL, refwwid, 0);
277                 dm_lib_release();
278         }
279
280         if (!r)
281                 condlog(2, "%s: devmap %s added", alias, dev->kernel);
282         else
283                 condlog(0, "%s: uev_add_map %s failed", alias, dev->kernel);
284
285         FREE(refwwid);
286         FREE(alias);
287         return r;
288 }
289
290 static int
291 uev_remove_map (struct sysfs_device * dev, struct vectors * vecs)
292 {
293         condlog(2, "%s: remove map (uevent)", dev->kernel);
294         return ev_remove_map(dev->kernel, vecs);
295 }
296
297 int
298 ev_remove_map (char * devname, struct vectors * vecs)
299 {
300         struct multipath * mpp;
301
302         mpp = find_mp_by_str(vecs->mpvec, devname);
303
304         if (!mpp) {
305                 condlog(2, "%s: devmap not registered, can't remove",
306                         devname);
307                 return 0;
308         }
309         flush_map(mpp, vecs);
310
311         return 0;
312 }
313
314 static int
315 uev_umount_map (struct sysfs_device * dev, struct vectors * vecs)
316 {
317         struct multipath * mpp;
318
319         condlog(2, "%s: umount map (uevent)", dev->kernel);
320
321         mpp = find_mp_by_str(vecs->mpvec, dev->kernel);
322
323         if (!mpp)
324                 return 0;
325
326         update_mpp_paths(mpp, vecs->pathvec);
327         verify_paths(mpp, vecs, NULL);
328
329         if (!VECTOR_SIZE(mpp->paths))
330                 flush_map(mpp, vecs);
331
332         return 0;
333 }
334
335 static int
336 uev_add_path (struct sysfs_device * dev, struct vectors * vecs)
337 {
338         condlog(2, "%s: add path (uevent)", dev->kernel);
339         return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
340 }
341
342
343 /*
344  * returns:
345  * 0: added
346  * 1: error
347  * 2: blacklisted
348  */
349 int
350 ev_add_path (char * devname, struct vectors * vecs)
351 {
352         struct multipath * mpp;
353         struct path * pp;
354         char empty_buff[WWID_SIZE] = {0};
355
356         pp = find_path_by_dev(vecs->pathvec, devname);
357
358         if (pp) {
359                 condlog(0, "%s: spurious uevent, path already in pathvec",
360                         devname);
361                 if (pp->mpp)
362                         return 0;
363         }
364         else {
365                 /*
366                  * get path vital state
367                  */
368                 if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
369                       devname, DI_ALL))) {
370                         condlog(0, "%s: failed to store path info", devname);
371                         return 1;
372                 }
373                 pp->checkint = conf->checkint;
374         }
375
376         /*
377          * need path UID to go any further
378          */
379         if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
380                 condlog(0, "%s: failed to get path uid", devname);
381                 return 1; /* leave path added to pathvec */
382         }
383         if (filter_path(conf, pp) > 0){
384                 int i = find_slot(vecs->pathvec, (void *)pp);
385                 if (i != -1)
386                         vector_del_slot(vecs->pathvec, i);
387                 free_path(pp);
388                 return 2;
389         }
390         mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
391 rescan:
392         if (mpp) {
393                 if (adopt_paths(vecs->pathvec, mpp))
394                         return 1; /* leave path added to pathvec */
395
396                 verify_paths(mpp, vecs, NULL);
397                 mpp->flush_on_last_del = FLUSH_UNDEF;
398                 mpp->action = ACT_RELOAD;
399         }
400         else {
401                 if ((mpp = add_map_with_path(vecs, pp, 1)))
402                         mpp->action = ACT_CREATE;
403                 else
404                         return 1; /* leave path added to pathvec */
405         }
406
407         /*
408          * push the map to the device-mapper
409          */
410         if (setup_map(mpp)) {
411                 condlog(0, "%s: failed to setup map for addition of new "
412                         "path %s", mpp->alias, devname);
413                 goto out;
414         }
415         /*
416          * reload the map for the multipath mapped device
417          */
418         if (domap(mpp) <= 0) {
419                 condlog(0, "%s: failed in domap for addition of new "
420                         "path %s", mpp->alias, devname);
421                 /*
422                  * deal with asynchronous uevents :((
423                  */
424                 if (mpp->action == ACT_RELOAD) {
425                         condlog(0, "%s: uev_add_path sleep", mpp->alias);
426                         sleep(1);
427                         update_mpp_paths(mpp, vecs->pathvec);
428                         goto rescan;
429                 }
430                 else
431                         goto out;
432         }
433         dm_lib_release();
434
435         /*
436          * update our state from kernel regardless of create or reload
437          */
438         if (setup_multipath(vecs, mpp))
439                 goto out;
440
441         sync_map_state(mpp);
442
443         if (mpp->action == ACT_CREATE &&
444             start_waiter_thread(mpp, vecs))
445                         goto out;
446
447         condlog(2, "%s path added to devmap %s", devname, mpp->alias);
448         return 0;
449
450 out:
451         remove_map(mpp, vecs, 1);
452         return 1;
453 }
454
455 static int
456 uev_remove_path (struct sysfs_device * dev, struct vectors * vecs)
457 {
458         int retval;
459
460         condlog(2, "%s: remove path (uevent)", dev->kernel);
461         retval = ev_remove_path(dev->kernel, vecs);
462         if (!retval)
463                 sysfs_device_put(dev);
464
465         return retval;
466 }
467
468 int
469 ev_remove_path (char * devname, struct vectors * vecs)
470 {
471         struct multipath * mpp;
472         struct path * pp;
473         int i, retval = 0;
474
475         pp = find_path_by_dev(vecs->pathvec, devname);
476
477         if (!pp) {
478                 condlog(0, "%s: spurious uevent, path not in pathvec", devname);
479                 return 1;
480         }
481
482         /*
483          * avoid referring to the map of an orphaned path
484          */
485         if ((mpp = pp->mpp)) {
486                 /*
487                  * transform the mp->pg vector of vectors of paths
488                  * into a mp->params string to feed the device-mapper
489                  */
490                 if (update_mpp_paths(mpp, vecs->pathvec)) {
491                         condlog(0, "%s: failed to update paths",
492                                 mpp->alias);
493                         goto fail;
494                 }
495                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
496                         vector_del_slot(mpp->paths, i);
497
498                 /*
499                  * remove the map IFF removing the last path
500                  */
501                 if (VECTOR_SIZE(mpp->paths) == 0) {
502                         char alias[WWID_SIZE];
503
504                         /*
505                          * flush_map will fail if the device is open
506                          */
507                         strncpy(alias, mpp->alias, WWID_SIZE);
508                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
509                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
510                                 mpp->retry_tick = 0;
511                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
512                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
513                                 dm_queue_if_no_path(mpp->alias, 0);
514                         }
515                         if (!flush_map(mpp, vecs)) {
516                                 condlog(2, "%s: removed map after"
517                                         " removing all paths",
518                                         alias);
519                                 retval = 0;
520                                 goto out;
521                         }
522                         /*
523                          * Not an error, continue
524                          */
525                 }
526
527                 if (setup_map(mpp)) {
528                         condlog(0, "%s: failed to setup map for"
529                                 " removal of path %s", mpp->alias,
530                                 devname);
531                         goto fail;
532                 }
533                 /*
534                  * reload the map
535                  */
536                 mpp->action = ACT_RELOAD;
537                 if (domap(mpp) <= 0) {
538                         condlog(0, "%s: failed in domap for "
539                                 "removal of path %s",
540                                 mpp->alias, devname);
541                         retval = 1;
542                 } else {
543                         /*
544                          * update our state from kernel
545                          */
546                         if (setup_multipath(vecs, mpp)) {
547                                 goto fail;
548                         }
549                         sync_map_state(mpp);
550
551                         condlog(2, "%s: path removed from map %s",
552                                 devname, mpp->alias);
553                 }
554         }
555
556 out:
557         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
558                 vector_del_slot(vecs->pathvec, i);
559
560         free_path(pp);
561
562         return retval;
563
564 fail:
565         remove_map_and_stop_waiter(mpp, vecs, 1);
566         return 1;
567 }
568
569 static int
570 map_discovery (struct vectors * vecs)
571 {
572         struct multipath * mpp;
573         unsigned int i;
574
575         if (dm_get_maps(vecs->mpvec))
576                 return 1;
577
578         vector_foreach_slot (vecs->mpvec, mpp, i)
579                 if (setup_multipath(vecs, mpp))
580                         return 1;
581
582         return 0;
583 }
584
585 int
586 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
587 {
588         struct vectors * vecs;
589         int r;
590
591         *reply = NULL;
592         *len = 0;
593         vecs = (struct vectors *)trigger_data;
594
595         pthread_cleanup_push(cleanup_lock, &vecs->lock);
596         lock(vecs->lock);
597
598         r = parse_cmd(str, reply, len, vecs);
599
600         if (r > 0) {
601                 *reply = STRDUP("fail\n");
602                 *len = strlen(*reply) + 1;
603                 r = 1;
604         }
605         else if (!r && *len == 0) {
606                 *reply = STRDUP("ok\n");
607                 *len = strlen(*reply) + 1;
608                 r = 0;
609         }
610         /* else if (r < 0) leave *reply alone */
611
612         lock_cleanup_pop(vecs->lock);
613         return r;
614 }
615
616 static int
617 uev_discard(char * devpath)
618 {
619         char *tmp;
620         char a[11], b[11];
621
622         /*
623          * keep only block devices, discard partitions
624          */
625         tmp = strstr(devpath, "/block/");
626         if (tmp == NULL){
627                 condlog(4, "no /block/ in '%s'", devpath);
628                 return 1;
629         }
630         if (sscanf(tmp, "/block/%10s", a) != 1 ||
631             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
632                 condlog(4, "discard event on %s", devpath);
633                 return 1;
634         }
635         return 0;
636 }
637
638 int
639 uev_trigger (struct uevent * uev, void * trigger_data)
640 {
641         int r = 0;
642         struct sysfs_device *sysdev;
643         struct vectors * vecs;
644
645         vecs = (struct vectors *)trigger_data;
646
647         if (uev_discard(uev->devpath))
648                 return 0;
649
650         sysdev = sysfs_device_get(uev->devpath);
651         if(!sysdev)
652                 return 0;
653
654         lock(vecs->lock);
655
656         /*
657          * device map event
658          * Add events are ignored here as the tables
659          * are not fully initialised then.
660          */
661         if (!strncmp(sysdev->kernel, "dm-", 3)) {
662                 if (!strncmp(uev->action, "change", 6)) {
663                         r = uev_add_map(sysdev, vecs);
664                         goto out;
665                 }
666                 if (!strncmp(uev->action, "remove", 6)) {
667                         r = uev_remove_map(sysdev, vecs);
668                         goto out;
669                 }
670                 if (!strncmp(uev->action, "umount", 6)) {
671                         r = uev_umount_map(sysdev, vecs);
672                         goto out;
673                 }
674                 goto out;
675         }
676
677         /*
678          * path add/remove event
679          */
680         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
681                            sysdev->kernel) > 0)
682                 goto out;
683
684         if (!strncmp(uev->action, "add", 3)) {
685                 r = uev_add_path(sysdev, vecs);
686                 goto out;
687         }
688         if (!strncmp(uev->action, "remove", 6)) {
689                 r = uev_remove_path(sysdev, vecs);
690                 goto out;
691         }
692
693 out:
694         unlock(vecs->lock);
695         return r;
696 }
697
698 static void *
699 ueventloop (void * ap)
700 {
701         block_signal(SIGUSR1, NULL);
702         block_signal(SIGHUP, NULL);
703
704         if (uevent_listen(&uev_trigger, ap))
705                 fprintf(stderr, "error starting uevent listener");
706
707         return NULL;
708 }
709
710 static void *
711 uxlsnrloop (void * ap)
712 {
713         block_signal(SIGUSR1, NULL);
714         block_signal(SIGHUP, NULL);
715
716         if (cli_init())
717                 return NULL;
718
719         set_handler_callback(LIST+PATHS, cli_list_paths);
720         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
721         set_handler_callback(LIST+MAPS, cli_list_maps);
722         set_handler_callback(LIST+STATUS, cli_list_status);
723         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
724         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
725         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
726         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
727         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
728         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
729         set_handler_callback(LIST+CONFIG, cli_list_config);
730         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
731         set_handler_callback(LIST+DEVICES, cli_list_devices);
732         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
733         set_handler_callback(ADD+PATH, cli_add_path);
734         set_handler_callback(DEL+PATH, cli_del_path);
735         set_handler_callback(ADD+MAP, cli_add_map);
736         set_handler_callback(DEL+MAP, cli_del_map);
737         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
738         set_handler_callback(RECONFIGURE, cli_reconfigure);
739         set_handler_callback(SUSPEND+MAP, cli_suspend);
740         set_handler_callback(RESUME+MAP, cli_resume);
741         set_handler_callback(RESIZE+MAP, cli_resize);
742         set_handler_callback(REINSTATE+PATH, cli_reinstate);
743         set_handler_callback(FAIL+PATH, cli_fail);
744         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
745         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
746         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
747         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
748         set_handler_callback(QUIT, cli_quit);
749
750         uxsock_listen(&uxsock_trigger, ap);
751
752         return NULL;
753 }
754
755 static int
756 exit_daemon (int status)
757 {
758         if (status != 0)
759                 fprintf(stderr, "bad exit status. see daemon.log\n");
760
761         condlog(3, "unlink pidfile");
762         unlink(DEFAULT_PIDFILE);
763
764         pthread_mutex_lock(&exit_mutex);
765         pthread_cond_signal(&exit_cond);
766         pthread_mutex_unlock(&exit_mutex);
767
768         return status;
769 }
770
771 static void
772 fail_path (struct path * pp, int del_active)
773 {
774         if (!pp->mpp)
775                 return;
776
777         condlog(2, "checker failed path %s in map %s",
778                  pp->dev_t, pp->mpp->alias);
779
780         dm_fail_path(pp->mpp->alias, pp->dev_t);
781         if (del_active)
782                 update_queue_mode_del_path(pp->mpp);
783 }
784
785 /*
786  * caller must have locked the path list before calling that function
787  */
788 static void
789 reinstate_path (struct path * pp, int add_active)
790 {
791         if (!pp->mpp)
792                 return;
793
794         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
795                 condlog(0, "%s: reinstate failed", pp->dev_t);
796         else {
797                 condlog(2, "%s: reinstated", pp->dev_t);
798                 if (add_active)
799                         update_queue_mode_add_path(pp->mpp);
800         }
801 }
802
803 static void
804 enable_group(struct path * pp)
805 {
806         struct pathgroup * pgp;
807
808         /*
809          * if path is added through uev_add_path, pgindex can be unset.
810          * next update_strings() will set it, upon map reload event.
811          *
812          * we can safely return here, because upon map reload, all
813          * PG will be enabled.
814          */
815         if (!pp->mpp->pg || !pp->pgindex)
816                 return;
817
818         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
819
820         if (pgp->status == PGSTATE_DISABLED) {
821                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
822                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
823         }
824 }
825
826 static void
827 mpvec_garbage_collector (struct vectors * vecs)
828 {
829         struct multipath * mpp;
830         unsigned int i;
831
832         if (!vecs->mpvec)
833                 return;
834
835         vector_foreach_slot (vecs->mpvec, mpp, i) {
836                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
837                         condlog(2, "%s: remove dead map", mpp->alias);
838                         remove_map_and_stop_waiter(mpp, vecs, 1);
839                         i--;
840                 }
841         }
842 }
843
844 static void
845 defered_failback_tick (vector mpvec)
846 {
847         struct multipath * mpp;
848         unsigned int i;
849
850         vector_foreach_slot (mpvec, mpp, i) {
851                 /*
852                  * defered failback getting sooner
853                  */
854                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
855                         mpp->failback_tick--;
856
857                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
858                                 switch_pathgroup(mpp);
859                 }
860         }
861 }
862
863 static void
864 retry_count_tick(vector mpvec)
865 {
866         struct multipath *mpp;
867         unsigned int i;
868
869         vector_foreach_slot (mpvec, mpp, i) {
870                 if (mpp->retry_tick) {
871                         mpp->stat_total_queueing_time++;
872                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
873                         if(--mpp->retry_tick == 0) {
874                                 dm_queue_if_no_path(mpp->alias, 0);
875                                 condlog(2, "%s: Disable queueing", mpp->alias);
876                         }
877                 }
878         }
879 }
880
881 void
882 check_path (struct vectors * vecs, struct path * pp)
883 {
884         int newstate;
885
886         if (!pp->mpp)
887                 return;
888
889         if (pp->tick && --pp->tick)
890                 return; /* don't check this path yet */
891
892         /*
893          * provision a next check soonest,
894          * in case we exit abnormaly from here
895          */
896         pp->tick = conf->checkint;
897
898         if (!checker_selected(&pp->checker)) {
899                 pathinfo(pp, conf->hwtable, DI_SYSFS);
900                 select_checker(pp);
901         }
902         if (!checker_selected(&pp->checker)) {
903                 condlog(0, "%s: checker is not set", pp->dev);
904                 return;
905         }
906         /*
907          * Set checker in async mode.
908          * Honored only by checker implementing the said mode.
909          */
910         checker_set_async(&pp->checker);
911
912         if (path_offline(pp))
913                 newstate = PATH_DOWN;
914         else
915                 newstate = checker_check(&pp->checker);
916
917         if (newstate < 0) {
918                 condlog(2, "%s: unusable path", pp->dev);
919                 pathinfo(pp, conf->hwtable, 0);
920                 return;
921         }
922         /*
923          * Async IO in flight. Keep the previous path state
924          * and reschedule as soon as possible
925          */
926         if (newstate == PATH_PENDING) {
927                 pp->tick = 1;
928                 return;
929         }
930         if (newstate != pp->state) {
931                 int oldstate = pp->state;
932                 pp->state = newstate;
933                 LOG_MSG(1, checker_message(&pp->checker));
934
935                 /*
936                  * upon state change, reset the checkint
937                  * to the shortest delay
938                  */
939                 pp->checkint = conf->checkint;
940
941                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
942                     update_multipath_strings(pp->mpp, vecs->pathvec)) {
943                         /*
944                          * proactively fail path in the DM
945                          */
946                         if (oldstate == PATH_UP ||
947                             oldstate == PATH_GHOST)
948                                 fail_path(pp, 1);
949                         else
950                                 fail_path(pp, 0);
951
952                         /*
953                          * cancel scheduled failback
954                          */
955                         pp->mpp->failback_tick = 0;
956
957                         pp->mpp->stat_path_failures++;
958                         return;
959                 }
960
961                 /*
962                  * reinstate this path
963                  */
964                 if (oldstate != PATH_UP &&
965                     oldstate != PATH_GHOST)
966                         reinstate_path(pp, 1);
967                 else
968                         reinstate_path(pp, 0);
969
970                 /*
971                  * schedule [defered] failback
972                  */
973                 if (pp->mpp->pgfailback > 0)
974                         pp->mpp->failback_tick =
975                                 pp->mpp->pgfailback + 1;
976                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
977                     need_switch_pathgroup(pp->mpp, 1))
978                         switch_pathgroup(pp->mpp);
979
980                 /*
981                  * if at least one path is up in a group, and
982                  * the group is disabled, re-enable it
983                  */
984                 if (newstate == PATH_UP)
985                         enable_group(pp);
986         }
987         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
988                 LOG_MSG(4, checker_message(&pp->checker));
989                 /*
990                  * double the next check delay.
991                  * max at conf->max_checkint
992                  */
993                 if (pp->checkint < (conf->max_checkint / 2))
994                         pp->checkint = 2 * pp->checkint;
995                 else
996                         pp->checkint = conf->max_checkint;
997
998                 pp->tick = pp->checkint;
999                 condlog(4, "%s: delay next check %is",
1000                                 pp->dev_t, pp->tick);
1001         }
1002         else if (newstate == PATH_DOWN)
1003                 LOG_MSG(2, checker_message(&pp->checker));
1004
1005         pp->state = newstate;
1006
1007         /*
1008          * path prio refreshing
1009          */
1010         condlog(4, "path prio refresh");
1011         pathinfo(pp, conf->hwtable, DI_PRIO);
1012
1013         /*
1014          * pathgroup failback policy
1015          */
1016         if (need_switch_pathgroup(pp->mpp, 0)) {
1017                 if (pp->mpp->pgfailback > 0 &&
1018                     pp->mpp->failback_tick <= 0)
1019                         pp->mpp->failback_tick =
1020                                 pp->mpp->pgfailback + 1;
1021                 else if (pp->mpp->pgfailback ==
1022                                 -FAILBACK_IMMEDIATE)
1023                         switch_pathgroup(pp->mpp);
1024         }
1025 }
1026
1027 static void *
1028 checkerloop (void *ap)
1029 {
1030         struct vectors *vecs;
1031         struct path *pp;
1032         int count = 0;
1033         unsigned int i;
1034         sigset_t old;
1035
1036         mlockall(MCL_CURRENT | MCL_FUTURE);
1037         vecs = (struct vectors *)ap;
1038         condlog(2, "path checkers start up");
1039
1040         /*
1041          * init the path check interval
1042          */
1043         vector_foreach_slot (vecs->pathvec, pp, i) {
1044                 pp->checkint = conf->checkint;
1045         }
1046
1047         while (1) {
1048                 block_signal(SIGHUP, &old);
1049                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1050                 lock(vecs->lock);
1051                 condlog(4, "tick");
1052
1053                 if (vecs->pathvec) {
1054                         vector_foreach_slot (vecs->pathvec, pp, i) {
1055                                 check_path(vecs, pp);
1056                         }
1057                 }
1058                 if (vecs->mpvec) {
1059                         defered_failback_tick(vecs->mpvec);
1060                         retry_count_tick(vecs->mpvec);
1061                 }
1062                 if (count)
1063                         count--;
1064                 else {
1065                         condlog(4, "map garbage collection");
1066                         mpvec_garbage_collector(vecs);
1067                         count = MAPGCINT;
1068                 }
1069
1070                 lock_cleanup_pop(vecs->lock);
1071                 pthread_sigmask(SIG_SETMASK, &old, NULL);
1072                 sleep(1);
1073         }
1074         return NULL;
1075 }
1076
1077 int
1078 configure (struct vectors * vecs, int start_waiters)
1079 {
1080         struct multipath * mpp;
1081         struct path * pp;
1082         vector mpvec;
1083         int i;
1084
1085         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1086                 return 1;
1087
1088         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1089                 return 1;
1090
1091         if (!(mpvec = vector_alloc()))
1092                 return 1;
1093
1094         /*
1095          * probe for current path (from sysfs) and map (from dm) sets
1096          */
1097         path_discovery(vecs->pathvec, conf, DI_ALL);
1098
1099         vector_foreach_slot (vecs->pathvec, pp, i){
1100                 if (filter_path(conf, pp) > 0){
1101                         vector_del_slot(vecs->pathvec, i);
1102                         free_path(pp);
1103                         i--;
1104                 }
1105                 else
1106                         pp->checkint = conf->checkint;
1107         }
1108         if (map_discovery(vecs))
1109                 return 1;
1110
1111         /*
1112          * create new set of maps & push changed ones into dm
1113          */
1114         if (coalesce_paths(vecs, mpvec, NULL, 0))
1115                 return 1;
1116
1117         /*
1118          * may need to remove some maps which are no longer relevant
1119          * e.g., due to blacklist changes in conf file
1120          */
1121         if (coalesce_maps(vecs, mpvec))
1122                 return 1;
1123
1124         dm_lib_release();
1125
1126         sync_maps_state(mpvec);
1127
1128         /*
1129          * purge dm of old maps
1130          */
1131         remove_maps(vecs);
1132
1133         /*
1134          * save new set of maps formed by considering current path state
1135          */
1136         vector_free(vecs->mpvec);
1137         vecs->mpvec = mpvec;
1138
1139         /*
1140          * start dm event waiter threads for these new maps
1141          */
1142         vector_foreach_slot(vecs->mpvec, mpp, i) {
1143                 if (setup_multipath(vecs, mpp))
1144                         return 1;
1145                 if (start_waiters)
1146                         if (start_waiter_thread(mpp, vecs))
1147                                 return 1;
1148         }
1149         return 0;
1150 }
1151
1152 int
1153 reconfigure (struct vectors * vecs)
1154 {
1155         struct config * old = conf;
1156
1157         /*
1158          * free old map and path vectors ... they use old conf state
1159          */
1160         if (VECTOR_SIZE(vecs->mpvec))
1161                 remove_maps_and_stop_waiters(vecs);
1162
1163         if (VECTOR_SIZE(vecs->pathvec))
1164                 free_pathvec(vecs->pathvec, FREE_PATHS);
1165
1166         vecs->pathvec = NULL;
1167         conf = NULL;
1168
1169         if (load_config(DEFAULT_CONFIGFILE))
1170                 return 1;
1171
1172         conf->verbosity = old->verbosity;
1173
1174         if (!conf->checkint) {
1175                 conf->checkint = DEFAULT_CHECKINT;
1176                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1177         }
1178         configure(vecs, 1);
1179         free_config(old);
1180         return 0;
1181 }
1182
1183 static struct vectors *
1184 init_vecs (void)
1185 {
1186         struct vectors * vecs;
1187
1188         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1189
1190         if (!vecs)
1191                 return NULL;
1192
1193         vecs->lock.mutex =
1194                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1195
1196         if (!vecs->lock.mutex)
1197                 goto out;
1198
1199         pthread_mutex_init(vecs->lock.mutex, NULL);
1200         vecs->lock.depth = 0;
1201
1202         return vecs;
1203
1204 out:
1205         FREE(vecs);
1206         condlog(0, "failed to init paths");
1207         return NULL;
1208 }
1209
1210 static void *
1211 signal_set(int signo, void (*func) (int))
1212 {
1213         int r;
1214         struct sigaction sig;
1215         struct sigaction osig;
1216
1217         sig.sa_handler = func;
1218         sigemptyset(&sig.sa_mask);
1219         sig.sa_flags = 0;
1220
1221         r = sigaction(signo, &sig, &osig);
1222
1223         if (r < 0)
1224                 return (SIG_ERR);
1225         else
1226                 return (osig.sa_handler);
1227 }
1228
1229 static void
1230 sighup (int sig)
1231 {
1232         condlog(2, "reconfigure (SIGHUP)");
1233
1234         lock(gvecs->lock);
1235         reconfigure(gvecs);
1236         unlock(gvecs->lock);
1237
1238 #ifdef _DEBUG_
1239         dbg_free_final(NULL);
1240 #endif
1241 }
1242
1243 static void
1244 sigend (int sig)
1245 {
1246         exit_daemon(0);
1247 }
1248
1249 static void
1250 sigusr1 (int sig)
1251 {
1252         condlog(3, "SIGUSR1 received");
1253 }
1254
1255 static void
1256 signal_init(void)
1257 {
1258         signal_set(SIGHUP, sighup);
1259         signal_set(SIGUSR1, sigusr1);
1260         signal_set(SIGINT, sigend);
1261         signal_set(SIGTERM, sigend);
1262         signal(SIGPIPE, SIG_IGN);
1263 }
1264
1265 static void
1266 setscheduler (void)
1267 {
1268         int res;
1269         static struct sched_param sched_param = {
1270                 .sched_priority = 99
1271         };
1272
1273         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1274
1275         if (res == -1)
1276                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1277         return;
1278 }
1279
1280 static void
1281 set_oom_adj (int val)
1282 {
1283         FILE *fp;
1284
1285         fp = fopen("/proc/self/oom_adj", "w");
1286
1287         if (!fp)
1288                 return;
1289
1290         fprintf(fp, "%i", val);
1291         fclose(fp);
1292 }
1293
1294 void
1295 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
1296 {
1297         if (pthread_attr_init(attr)) {
1298                 fprintf(stderr, "can't initialize thread attr: %s\n",
1299                         strerror(errno));
1300                 exit(1);
1301         }
1302         if (stacksize < PTHREAD_STACK_MIN)
1303                 stacksize = PTHREAD_STACK_MIN;
1304
1305         if (pthread_attr_setstacksize(attr, stacksize)) {
1306                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
1307                         (unsigned long)stacksize, strerror(errno));
1308                 exit(1);
1309         }
1310         if (detached && pthread_attr_setdetachstate(attr,
1311                                                     PTHREAD_CREATE_DETACHED)) {
1312                 fprintf(stderr, "can't set thread to detached: %s\n",
1313                         strerror(errno));
1314                 exit(1);
1315         }
1316 }
1317
1318 static int
1319 child (void * param)
1320 {
1321         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1322         pthread_attr_t log_attr, misc_attr;
1323         struct vectors * vecs;
1324
1325         mlockall(MCL_CURRENT | MCL_FUTURE);
1326
1327         setup_thread_attr(&misc_attr, 64 * 1024, 1);
1328         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
1329
1330         if (logsink) {
1331                 setup_thread_attr(&log_attr, 64 * 1024, 0);
1332                 log_thread_start(&log_attr);
1333                 pthread_attr_destroy(&log_attr);
1334         }
1335
1336         condlog(2, "--------start up--------");
1337         condlog(2, "read " DEFAULT_CONFIGFILE);
1338
1339         if (load_config(DEFAULT_CONFIGFILE))
1340                 exit(1);
1341
1342         if (init_checkers()) {
1343                 condlog(0, "failed to initialize checkers");
1344                 exit(1);
1345         }
1346         if (init_prio()) {
1347                 condlog(0, "failed to initialize prioritizers");
1348                 exit(1);
1349         }
1350
1351         setlogmask(LOG_UPTO(conf->verbosity + 3));
1352
1353         /*
1354          * fill the voids left in the config file
1355          */
1356         if (!conf->checkint) {
1357                 conf->checkint = DEFAULT_CHECKINT;
1358                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1359         }
1360
1361         if (conf->max_fds) {
1362                 struct rlimit fd_limit;
1363                 if (conf->max_fds > 0) {
1364                         fd_limit.rlim_cur = conf->max_fds;
1365                         fd_limit.rlim_max = conf->max_fds;
1366                 }
1367                 else {
1368                         fd_limit.rlim_cur = RLIM_INFINITY;
1369                         fd_limit.rlim_max = RLIM_INFINITY;
1370                 }
1371                 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
1372                         condlog(0, "can't set open fds limit to %d : %s\n",
1373                                 conf->max_fds, strerror(errno));
1374         }
1375
1376         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1377                 if (logsink)
1378                         log_thread_stop();
1379
1380                 exit(1);
1381         }
1382         signal_init();
1383         setscheduler();
1384         set_oom_adj(-16);
1385         vecs = gvecs = init_vecs();
1386
1387         if (!vecs)
1388                 exit(1);
1389
1390         if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
1391                 condlog(0, "can not find sysfs mount point");
1392                 exit(1);
1393         }
1394         conf->daemon = 1;
1395         /*
1396          * fetch and configure both paths and multipaths
1397          */
1398         if (configure(vecs, 1)) {
1399                 condlog(0, "failure during configuration");
1400                 exit(1);
1401         }
1402         /*
1403          * start threads
1404          */
1405         pthread_create(&check_thr, &misc_attr, checkerloop, vecs);
1406         pthread_create(&uevent_thr, &misc_attr, ueventloop, vecs);
1407         pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
1408         pthread_attr_destroy(&misc_attr);
1409
1410         pthread_cond_wait(&exit_cond, &exit_mutex);
1411
1412         /*
1413          * exit path
1414          */
1415         block_signal(SIGHUP, NULL);
1416         lock(vecs->lock);
1417         remove_maps_and_stop_waiters(vecs);
1418         free_pathvec(vecs->pathvec, FREE_PATHS);
1419
1420         pthread_cancel(check_thr);
1421         pthread_cancel(uevent_thr);
1422         pthread_cancel(uxlsnr_thr);
1423
1424         sysfs_cleanup();
1425
1426         free_keys(keys);
1427         keys = NULL;
1428         free_handlers(handlers);
1429         handlers = NULL;
1430         free_polls();
1431
1432         unlock(vecs->lock);
1433         /* Now all the waitevent threads will start rushing in. */
1434         while (vecs->lock.depth > 0) {
1435                 sleep (1); /* This is weak. */
1436                 condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
1437         }
1438         pthread_mutex_destroy(vecs->lock.mutex);
1439         FREE(vecs->lock.mutex);
1440         vecs->lock.depth = 0;
1441         vecs->lock.mutex = NULL;
1442         FREE(vecs);
1443         vecs = NULL;
1444
1445         condlog(2, "--------shut down-------");
1446
1447         if (logsink)
1448                 log_thread_stop();
1449
1450         dm_lib_release();
1451         dm_lib_exit();
1452
1453         cleanup_prio();
1454         cleanup_checkers();
1455         /*
1456          * Freeing config must be done after condlog() and dm_lib_exit(),
1457          * because logging functions like dlog() and dm_write_log()
1458          * reference the config.
1459          */
1460         free_config(conf);
1461         conf = NULL;
1462
1463 #ifdef _DEBUG_
1464         dbg_free_final(NULL);
1465 #endif
1466
1467         exit(0);
1468 }
1469
1470 static int
1471 daemonize(void)
1472 {
1473         int pid;
1474         int in_fd, out_fd;
1475
1476         if( (pid = fork()) < 0){
1477                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1478                 return -1;
1479         }
1480         else if (pid != 0)
1481                 return pid;
1482
1483         setsid();
1484
1485         if ( (pid = fork()) < 0)
1486                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1487         else if (pid != 0)
1488                 _exit(0);
1489
1490         in_fd = open("/dev/null", O_RDONLY);
1491         if (in_fd < 0){
1492                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1493                         strerror(errno));
1494                 _exit(0);
1495         }
1496         out_fd = open("/dev/console", O_WRONLY);
1497         if (out_fd < 0){
1498                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1499                         strerror(errno));
1500                 _exit(0);
1501         }
1502
1503         close(STDIN_FILENO);
1504         dup(in_fd);
1505         close(STDOUT_FILENO);
1506         dup(out_fd);
1507         close(STDERR_FILENO);
1508         dup(out_fd);
1509
1510         close(in_fd);
1511         close(out_fd);
1512         if (chdir("/") < 0)
1513                 fprintf(stderr, "cannot chdir to '/', continuing\n");
1514
1515         return 0;
1516 }
1517
1518 int
1519 main (int argc, char *argv[])
1520 {
1521         extern char *optarg;
1522         extern int optind;
1523         int arg;
1524         int err;
1525
1526         logsink = 1;
1527         dm_init();
1528
1529         if (getuid() != 0) {
1530                 fprintf(stderr, "need to be root\n");
1531                 exit(1);
1532         }
1533
1534         /* make sure we don't lock any path */
1535         chdir("/");
1536         umask(umask(077) | 022);
1537
1538         conf = alloc_config();
1539
1540         if (!conf)
1541                 exit(1);
1542
1543         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1544         switch(arg) {
1545                 case 'd':
1546                         logsink = 0;
1547                         //debug=1; /* ### comment me out ### */
1548                         break;
1549                 case 'v':
1550                         if (sizeof(optarg) > sizeof(char *) ||
1551                             !isdigit(optarg[0]))
1552                                 exit(1);
1553
1554                         conf->verbosity = atoi(optarg);
1555                         break;
1556                 case 'k':
1557                         uxclnt(optarg);
1558                         exit(0);
1559                 default:
1560                         ;
1561                 }
1562         }
1563         if (optind < argc) {
1564                 char cmd[CMDSIZE];
1565                 char * s = cmd;
1566                 char * c = s;
1567
1568                 while (optind < argc) {
1569                         if (strchr(argv[optind], ' '))
1570                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
1571                         else
1572                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
1573                         optind++;
1574                 }
1575                 c += snprintf(c, s + CMDSIZE - c, "\n");
1576                 uxclnt(s);
1577                 exit(0);
1578         }
1579
1580         if (!logsink)
1581                 err = 0;
1582         else
1583                 err = daemonize();
1584
1585         if (err < 0)
1586                 /* error */
1587                 exit(1);
1588         else if (err > 0)
1589                 /* parent dies */
1590                 exit(0);
1591         else
1592                 /* child lives */
1593                 return (child(NULL));
1594 }
1595