multipath-tools: Improvement to max_fds
[platform/upstream/multipath-tools.git] / multipathd / main.c
1 /*
2  * Copyright (c) 2004, 2005 Christophe Varoqui
3  * Copyright (c) 2005 Kiyoshi Ueda, NEC
4  * Copyright (c) 2005 Benjamin Marzinski, Redhat
5  * Copyright (c) 2005 Edward Goggin, EMC
6  */
7 #include <unistd.h>
8 #include <sys/stat.h>
9 #include <libdevmapper.h>
10 #include <wait.h>
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/time.h>
16 #include <sys/resource.h>
17 #include <limits.h>
18
19 /*
20  * libcheckers
21  */
22 #include <checkers.h>
23
24 /*
25  * libmultipath
26  */
27 #include <parser.h>
28 #include <vector.h>
29 #include <memory.h>
30 #include <config.h>
31 #include <util.h>
32 #include <hwtable.h>
33 #include <defaults.h>
34 #include <structs.h>
35 #include <callout.h>
36 #include <blacklist.h>
37 #include <structs_vec.h>
38 #include <dmparser.h>
39 #include <devmapper.h>
40 #include <sysfs.h>
41 #include <dict.h>
42 #include <discovery.h>
43 #include <debug.h>
44 #include <propsel.h>
45 #include <uevent.h>
46 #include <switchgroup.h>
47 #include <print.h>
48 #include <configure.h>
49 #include <prio.h>
50
51 #include "main.h"
52 #include "pidfile.h"
53 #include "uxlsnr.h"
54 #include "uxclnt.h"
55 #include "cli.h"
56 #include "cli_handlers.h"
57 #include "lock.h"
58 #include "waiter.h"
59
60 #define FILE_NAME_SIZE 256
61 #define CMDSIZE 160
62
63 #define LOG_MSG(a,b) \
64         if (strlen(b)) condlog(a, "%s: %s", pp->dev, b);
65
66 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
67 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
68
69 int logsink;
70
71 /*
72  * global copy of vecs for use in sig handlers
73  */
74 struct vectors * gvecs;
75
76 static int
77 need_switch_pathgroup (struct multipath * mpp, int refresh)
78 {
79         struct pathgroup * pgp;
80         struct path * pp;
81         unsigned int i, j;
82
83         if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
84                 return 0;
85
86         /*
87          * Refresh path priority values
88          */
89         if (refresh)
90                 vector_foreach_slot (mpp->pg, pgp, i)
91                         vector_foreach_slot (pgp->paths, pp, j)
92                                 pathinfo(pp, conf->hwtable, DI_PRIO);
93
94         mpp->bestpg = select_path_group(mpp);
95
96         if (mpp->bestpg != mpp->nextpg)
97                 return 1;
98
99         return 0;
100 }
101
102 static void
103 switch_pathgroup (struct multipath * mpp)
104 {
105         mpp->stat_switchgroup++;
106         dm_switchgroup(mpp->alias, mpp->bestpg);
107         condlog(2, "%s: switch to path group #%i",
108                  mpp->alias, mpp->bestpg);
109 }
110
111 static int
112 coalesce_maps(struct vectors *vecs, vector nmpv)
113 {
114         struct multipath * ompp;
115         vector ompv = vecs->mpvec;
116         unsigned int i;
117         int j;
118
119         vector_foreach_slot (ompv, ompp, i) {
120                 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
121                         /*
122                          * remove all current maps not allowed by the
123                          * current configuration
124                          */
125                         if (dm_flush_map(ompp->alias)) {
126                                 condlog(0, "%s: unable to flush devmap",
127                                         ompp->alias);
128                                 /*
129                                  * may be just because the device is open
130                                  */
131                                 if (!vector_alloc_slot(nmpv))
132                                         return 1;
133
134                                 vector_set_slot(nmpv, ompp);
135                                 setup_multipath(vecs, ompp);
136
137                                 if ((j = find_slot(ompv, (void *)ompp)) != -1)
138                                         vector_del_slot(ompv, j);
139
140                                 continue;
141                         }
142                         else {
143                                 dm_lib_release();
144                                 condlog(2, "%s devmap removed", ompp->alias);
145                         }
146                 }
147         }
148         return 0;
149 }
150
151 void
152 sync_map_state(struct multipath *mpp)
153 {
154         struct pathgroup *pgp;
155         struct path *pp;
156         unsigned int i, j;
157
158         if (!mpp->pg)
159                 return;
160
161         vector_foreach_slot (mpp->pg, pgp, i){
162                 vector_foreach_slot (pgp->paths, pp, j){
163                         if (pp->state == PATH_UNCHECKED || 
164                             pp->state == PATH_WILD)
165                                 continue;
166                         if ((pp->dmstate == PSTATE_FAILED ||
167                              pp->dmstate == PSTATE_UNDEF) &&
168                             (pp->state == PATH_UP || pp->state == PATH_GHOST))
169                                 dm_reinstate_path(mpp->alias, pp->dev_t);
170                         else if ((pp->dmstate == PSTATE_ACTIVE ||
171                                   pp->dmstate == PSTATE_UNDEF) &&
172                                  (pp->state == PATH_DOWN ||
173                                   pp->state == PATH_SHAKY))
174                                 dm_fail_path(mpp->alias, pp->dev_t);
175                 }
176         }
177 }
178
179 static void
180 sync_maps_state(vector mpvec)
181 {
182         unsigned int i;
183         struct multipath *mpp;
184
185         vector_foreach_slot (mpvec, mpp, i)
186                 sync_map_state(mpp);
187 }
188
189 static int
190 flush_map(struct multipath * mpp, struct vectors * vecs)
191 {
192         /*
193          * clear references to this map before flushing so we can ignore
194          * the spurious uevent we may generate with the dm_flush_map call below
195          */
196         if (dm_flush_map(mpp->alias)) {
197                 /*
198                  * May not really be an error -- if the map was already flushed
199                  * from the device mapper by dmsetup(8) for instance.
200                  */
201                 condlog(0, "%s: can't flush", mpp->alias);
202                 return 1;
203         }
204         else {
205                 dm_lib_release();
206                 condlog(2, "%s: devmap removed", mpp->alias);
207         }
208
209         orphan_paths(vecs->pathvec, mpp);
210         remove_map_and_stop_waiter(mpp, vecs, 1);
211
212         return 0;
213 }
214
215 static int
216 uev_add_map (struct sysfs_device * dev, struct vectors * vecs)
217 {
218         condlog(2, "%s: add map (uevent)", dev->kernel);
219         return ev_add_map(dev, vecs);
220 }
221
222 int
223 ev_add_map (struct sysfs_device * dev, struct vectors * vecs)
224 {
225         char * alias;
226         char *dev_t;
227         int major, minor;
228         char * refwwid;
229         struct multipath * mpp;
230         int map_present;
231         int r = 1;
232
233         dev_t = sysfs_attr_get_value(dev->devpath, "dev");
234
235         if (!dev_t || sscanf(dev_t, "%d:%d", &major, &minor) != 2)
236                 return 1;
237
238         alias = dm_mapname(major, minor);
239
240         if (!alias)
241                 return 1;
242
243         map_present = dm_map_present(alias);
244
245         if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
246                 condlog(4, "%s: not a multipath map", alias);
247                 FREE(alias);
248                 return 0;
249         }
250
251         mpp = find_mp_by_alias(vecs->mpvec, alias);
252
253         if (mpp) {
254                 /*
255                  * Not really an error -- we generate our own uevent
256                  * if we create a multipath mapped device as a result
257                  * of uev_add_path
258                  */
259                 condlog(0, "%s: devmap already registered",
260                         dev->kernel);
261                 FREE(alias);
262                 return 0;
263         }
264
265         /*
266          * now we can register the map
267          */
268         if (map_present && (mpp = add_map_without_path(vecs, minor, alias))) {
269                 sync_map_state(mpp);
270                 condlog(2, "%s: devmap %s added", alias, dev->kernel);
271                 return 0;
272         }
273         refwwid = get_refwwid(dev->kernel, DEV_DEVMAP, vecs->pathvec);
274
275         if (refwwid) {
276                 r = coalesce_paths(vecs, NULL, refwwid, 0);
277                 dm_lib_release();
278         }
279
280         if (!r)
281                 condlog(2, "%s: devmap %s added", alias, dev->kernel);
282         else
283                 condlog(0, "%s: uev_add_map %s failed", alias, dev->kernel);
284
285         FREE(refwwid);
286         FREE(alias);
287         return r;
288 }
289
290 static int
291 uev_remove_map (struct sysfs_device * dev, struct vectors * vecs)
292 {
293         condlog(2, "%s: remove map (uevent)", dev->kernel);
294         return ev_remove_map(dev->kernel, vecs);
295 }
296
297 int
298 ev_remove_map (char * devname, struct vectors * vecs)
299 {
300         struct multipath * mpp;
301
302         mpp = find_mp_by_str(vecs->mpvec, devname);
303
304         if (!mpp) {
305                 condlog(2, "%s: devmap not registered, can't remove",
306                         devname);
307                 return 0;
308         }
309         flush_map(mpp, vecs);
310
311         return 0;
312 }
313
314 static int
315 uev_umount_map (struct sysfs_device * dev, struct vectors * vecs)
316 {
317         struct multipath * mpp;
318
319         condlog(2, "%s: umount map (uevent)", dev->kernel);
320
321         mpp = find_mp_by_str(vecs->mpvec, dev->kernel);
322
323         if (!mpp)
324                 return 0;
325
326         update_mpp_paths(mpp, vecs->pathvec);
327         verify_paths(mpp, vecs, NULL);
328
329         if (!VECTOR_SIZE(mpp->paths))
330                 flush_map(mpp, vecs);
331
332         return 0;
333 }
334
335 static int
336 uev_add_path (struct sysfs_device * dev, struct vectors * vecs)
337 {
338         condlog(2, "%s: add path (uevent)", dev->kernel);
339         return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
340 }
341
342
343 /*
344  * returns:
345  * 0: added
346  * 1: error
347  * 2: blacklisted
348  */
349 int
350 ev_add_path (char * devname, struct vectors * vecs)
351 {
352         struct multipath * mpp;
353         struct path * pp;
354         char empty_buff[WWID_SIZE] = {0};
355
356         if (strstr(devname, "..") != NULL) {
357                 /*
358                  * Don't allow relative device names in the pathvec
359                  */
360                 condlog(0, "%s: path name is invalid", devname);
361                 return 1;
362         }
363
364         pp = find_path_by_dev(vecs->pathvec, devname);
365
366         if (pp) {
367                 condlog(0, "%s: spurious uevent, path already in pathvec",
368                         devname);
369                 if (pp->mpp)
370                         return 0;
371         }
372         else {
373                 /*
374                  * get path vital state
375                  */
376                 if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
377                       devname, DI_ALL))) {
378                         condlog(0, "%s: failed to store path info", devname);
379                         return 1;
380                 }
381                 pp->checkint = conf->checkint;
382         }
383
384         /*
385          * need path UID to go any further
386          */
387         if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
388                 condlog(0, "%s: failed to get path uid", devname);
389                 return 1; /* leave path added to pathvec */
390         }
391         if (filter_path(conf, pp) > 0){
392                 int i = find_slot(vecs->pathvec, (void *)pp);
393                 if (i != -1)
394                         vector_del_slot(vecs->pathvec, i);
395                 free_path(pp);
396                 return 2;
397         }
398         mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
399 rescan:
400         if (mpp) {
401                 condlog(4,"%s: adopting all paths for path %s",
402                         mpp->alias, pp->dev);
403                 if (adopt_paths(vecs->pathvec, mpp))
404                         return 1; /* leave path added to pathvec */
405
406                 verify_paths(mpp, vecs, NULL);
407                 mpp->flush_on_last_del = FLUSH_UNDEF;
408                 mpp->action = ACT_RELOAD;
409         }
410         else {
411                 condlog(4,"%s: creating new map", pp->dev);
412                 if ((mpp = add_map_with_path(vecs, pp, 1)))
413                         mpp->action = ACT_CREATE;
414                 else
415                         return 1; /* leave path added to pathvec */
416         }
417
418         /*
419          * push the map to the device-mapper
420          */
421         if (setup_map(mpp)) {
422                 condlog(0, "%s: failed to setup map for addition of new "
423                         "path %s", mpp->alias, devname);
424                 goto out;
425         }
426         /*
427          * reload the map for the multipath mapped device
428          */
429         if (domap(mpp) <= 0) {
430                 condlog(0, "%s: failed in domap for addition of new "
431                         "path %s", mpp->alias, devname);
432                 /*
433                  * deal with asynchronous uevents :((
434                  */
435                 if (mpp->action == ACT_RELOAD) {
436                         condlog(0, "%s: uev_add_path sleep", mpp->alias);
437                         sleep(1);
438                         update_mpp_paths(mpp, vecs->pathvec);
439                         goto rescan;
440                 }
441                 else
442                         goto out;
443         }
444         dm_lib_release();
445
446         /*
447          * update our state from kernel regardless of create or reload
448          */
449         if (setup_multipath(vecs, mpp))
450                 goto out;
451
452         sync_map_state(mpp);
453
454         if (mpp->action == ACT_CREATE &&
455             start_waiter_thread(mpp, vecs))
456                         goto out;
457
458         condlog(2, "%s path added to devmap %s", devname, mpp->alias);
459         return 0;
460
461 out:
462         remove_map(mpp, vecs, 1);
463         return 1;
464 }
465
466 static int
467 uev_remove_path (struct sysfs_device * dev, struct vectors * vecs)
468 {
469         int retval;
470
471         condlog(2, "%s: remove path (uevent)", dev->kernel);
472         retval = ev_remove_path(dev->kernel, vecs);
473         if (!retval)
474                 sysfs_device_put(dev);
475
476         return retval;
477 }
478
479 int
480 ev_remove_path (char * devname, struct vectors * vecs)
481 {
482         struct multipath * mpp;
483         struct path * pp;
484         int i, retval = 0;
485
486         pp = find_path_by_dev(vecs->pathvec, devname);
487
488         if (!pp) {
489                 /* Not an error; path might have been purged earlier */
490                 condlog(0, "%s: path already removed", devname);
491                 return 0;
492         }
493
494         /*
495          * avoid referring to the map of an orphaned path
496          */
497         if ((mpp = pp->mpp)) {
498                 /*
499                  * transform the mp->pg vector of vectors of paths
500                  * into a mp->params string to feed the device-mapper
501                  */
502                 if (update_mpp_paths(mpp, vecs->pathvec)) {
503                         condlog(0, "%s: failed to update paths",
504                                 mpp->alias);
505                         goto fail;
506                 }
507                 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
508                         vector_del_slot(mpp->paths, i);
509
510                 /*
511                  * remove the map IFF removing the last path
512                  */
513                 if (VECTOR_SIZE(mpp->paths) == 0) {
514                         char alias[WWID_SIZE];
515
516                         /*
517                          * flush_map will fail if the device is open
518                          */
519                         strncpy(alias, mpp->alias, WWID_SIZE);
520                         if (mpp->flush_on_last_del == FLUSH_ENABLED) {
521                                 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
522                                 mpp->retry_tick = 0;
523                                 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
524                                 mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
525                                 dm_queue_if_no_path(mpp->alias, 0);
526                         }
527                         if (!flush_map(mpp, vecs)) {
528                                 condlog(2, "%s: removed map after"
529                                         " removing all paths",
530                                         alias);
531                                 retval = 0;
532                                 goto out;
533                         }
534                         /*
535                          * Not an error, continue
536                          */
537                 }
538
539                 if (setup_map(mpp)) {
540                         condlog(0, "%s: failed to setup map for"
541                                 " removal of path %s", mpp->alias,
542                                 devname);
543                         goto fail;
544                 }
545                 /*
546                  * reload the map
547                  */
548                 mpp->action = ACT_RELOAD;
549                 if (domap(mpp) <= 0) {
550                         condlog(0, "%s: failed in domap for "
551                                 "removal of path %s",
552                                 mpp->alias, devname);
553                         retval = 1;
554                 } else {
555                         /*
556                          * update our state from kernel
557                          */
558                         if (setup_multipath(vecs, mpp)) {
559                                 goto fail;
560                         }
561                         sync_map_state(mpp);
562
563                         condlog(2, "%s: path removed from map %s",
564                                 devname, mpp->alias);
565                 }
566         }
567
568 out:
569         if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
570                 vector_del_slot(vecs->pathvec, i);
571
572         free_path(pp);
573
574         return retval;
575
576 fail:
577         remove_map_and_stop_waiter(mpp, vecs, 1);
578         return 1;
579 }
580
581 static int
582 map_discovery (struct vectors * vecs)
583 {
584         struct multipath * mpp;
585         unsigned int i;
586
587         if (dm_get_maps(vecs->mpvec))
588                 return 1;
589
590         vector_foreach_slot (vecs->mpvec, mpp, i)
591                 if (setup_multipath(vecs, mpp))
592                         return 1;
593
594         return 0;
595 }
596
597 int
598 uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
599 {
600         struct vectors * vecs;
601         int r;
602
603         *reply = NULL;
604         *len = 0;
605         vecs = (struct vectors *)trigger_data;
606
607         pthread_cleanup_push(cleanup_lock, &vecs->lock);
608         lock(vecs->lock);
609
610         r = parse_cmd(str, reply, len, vecs);
611
612         if (r > 0) {
613                 *reply = STRDUP("fail\n");
614                 *len = strlen(*reply) + 1;
615                 r = 1;
616         }
617         else if (!r && *len == 0) {
618                 *reply = STRDUP("ok\n");
619                 *len = strlen(*reply) + 1;
620                 r = 0;
621         }
622         /* else if (r < 0) leave *reply alone */
623
624         lock_cleanup_pop(vecs->lock);
625         return r;
626 }
627
628 static int
629 uev_discard(char * devpath)
630 {
631         char *tmp;
632         char a[11], b[11];
633
634         /*
635          * keep only block devices, discard partitions
636          */
637         tmp = strstr(devpath, "/block/");
638         if (tmp == NULL){
639                 condlog(4, "no /block/ in '%s'", devpath);
640                 return 1;
641         }
642         if (sscanf(tmp, "/block/%10s", a) != 1 ||
643             sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
644                 condlog(4, "discard event on %s", devpath);
645                 return 1;
646         }
647         return 0;
648 }
649
650 int
651 uev_trigger (struct uevent * uev, void * trigger_data)
652 {
653         int r = 0;
654         struct sysfs_device *sysdev;
655         struct vectors * vecs;
656
657         vecs = (struct vectors *)trigger_data;
658
659         if (uev_discard(uev->devpath))
660                 return 0;
661
662         sysdev = sysfs_device_get(uev->devpath);
663         if(!sysdev)
664                 return 0;
665
666         lock(vecs->lock);
667
668         /*
669          * device map event
670          * Add events are ignored here as the tables
671          * are not fully initialised then.
672          */
673         if (!strncmp(sysdev->kernel, "dm-", 3)) {
674                 if (!strncmp(uev->action, "change", 6)) {
675                         r = uev_add_map(sysdev, vecs);
676                         goto out;
677                 }
678                 if (!strncmp(uev->action, "remove", 6)) {
679                         r = uev_remove_map(sysdev, vecs);
680                         goto out;
681                 }
682                 if (!strncmp(uev->action, "umount", 6)) {
683                         r = uev_umount_map(sysdev, vecs);
684                         goto out;
685                 }
686                 goto out;
687         }
688
689         /*
690          * path add/remove event
691          */
692         if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
693                            sysdev->kernel) > 0)
694                 goto out;
695
696         if (!strncmp(uev->action, "add", 3)) {
697                 r = uev_add_path(sysdev, vecs);
698                 goto out;
699         }
700         if (!strncmp(uev->action, "remove", 6)) {
701                 r = uev_remove_path(sysdev, vecs);
702                 goto out;
703         }
704
705 out:
706         unlock(vecs->lock);
707         return r;
708 }
709
710 static void *
711 ueventloop (void * ap)
712 {
713         block_signal(SIGUSR1, NULL);
714         block_signal(SIGHUP, NULL);
715
716         if (uevent_listen(&uev_trigger, ap))
717                 fprintf(stderr, "error starting uevent listener");
718
719         return NULL;
720 }
721
722 static void *
723 uxlsnrloop (void * ap)
724 {
725         block_signal(SIGUSR1, NULL);
726         block_signal(SIGHUP, NULL);
727
728         if (cli_init())
729                 return NULL;
730
731         set_handler_callback(LIST+PATHS, cli_list_paths);
732         set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
733         set_handler_callback(LIST+MAPS, cli_list_maps);
734         set_handler_callback(LIST+STATUS, cli_list_status);
735         set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
736         set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
737         set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
738         set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
739         set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
740         set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
741         set_handler_callback(LIST+CONFIG, cli_list_config);
742         set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
743         set_handler_callback(LIST+DEVICES, cli_list_devices);
744         set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
745         set_handler_callback(ADD+PATH, cli_add_path);
746         set_handler_callback(DEL+PATH, cli_del_path);
747         set_handler_callback(ADD+MAP, cli_add_map);
748         set_handler_callback(DEL+MAP, cli_del_map);
749         set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
750         set_handler_callback(RECONFIGURE, cli_reconfigure);
751         set_handler_callback(SUSPEND+MAP, cli_suspend);
752         set_handler_callback(RESUME+MAP, cli_resume);
753         set_handler_callback(RESIZE+MAP, cli_resize);
754         set_handler_callback(REINSTATE+PATH, cli_reinstate);
755         set_handler_callback(FAIL+PATH, cli_fail);
756         set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
757         set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
758         set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
759         set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
760         set_handler_callback(QUIT, cli_quit);
761
762         umask(077);
763         uxsock_listen(&uxsock_trigger, ap);
764
765         return NULL;
766 }
767
768 static int
769 exit_daemon (int status)
770 {
771         if (status != 0)
772                 fprintf(stderr, "bad exit status. see daemon.log\n");
773
774         condlog(3, "unlink pidfile");
775         unlink(DEFAULT_PIDFILE);
776
777         pthread_mutex_lock(&exit_mutex);
778         pthread_cond_signal(&exit_cond);
779         pthread_mutex_unlock(&exit_mutex);
780
781         return status;
782 }
783
784 static void
785 fail_path (struct path * pp, int del_active)
786 {
787         if (!pp->mpp)
788                 return;
789
790         condlog(2, "checker failed path %s in map %s",
791                  pp->dev_t, pp->mpp->alias);
792
793         dm_fail_path(pp->mpp->alias, pp->dev_t);
794         if (del_active)
795                 update_queue_mode_del_path(pp->mpp);
796 }
797
798 /*
799  * caller must have locked the path list before calling that function
800  */
801 static void
802 reinstate_path (struct path * pp, int add_active)
803 {
804         if (!pp->mpp)
805                 return;
806
807         if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
808                 condlog(0, "%s: reinstate failed", pp->dev_t);
809         else {
810                 condlog(2, "%s: reinstated", pp->dev_t);
811                 if (add_active)
812                         update_queue_mode_add_path(pp->mpp);
813         }
814 }
815
816 static void
817 enable_group(struct path * pp)
818 {
819         struct pathgroup * pgp;
820
821         /*
822          * if path is added through uev_add_path, pgindex can be unset.
823          * next update_strings() will set it, upon map reload event.
824          *
825          * we can safely return here, because upon map reload, all
826          * PG will be enabled.
827          */
828         if (!pp->mpp->pg || !pp->pgindex)
829                 return;
830
831         pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
832
833         if (pgp->status == PGSTATE_DISABLED) {
834                 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
835                 dm_enablegroup(pp->mpp->alias, pp->pgindex);
836         }
837 }
838
839 static void
840 mpvec_garbage_collector (struct vectors * vecs)
841 {
842         struct multipath * mpp;
843         unsigned int i;
844
845         if (!vecs->mpvec)
846                 return;
847
848         vector_foreach_slot (vecs->mpvec, mpp, i) {
849                 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
850                         condlog(2, "%s: remove dead map", mpp->alias);
851                         remove_map_and_stop_waiter(mpp, vecs, 1);
852                         i--;
853                 }
854         }
855 }
856
857 static void
858 defered_failback_tick (vector mpvec)
859 {
860         struct multipath * mpp;
861         unsigned int i;
862
863         vector_foreach_slot (mpvec, mpp, i) {
864                 /*
865                  * defered failback getting sooner
866                  */
867                 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
868                         mpp->failback_tick--;
869
870                         if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
871                                 switch_pathgroup(mpp);
872                 }
873         }
874 }
875
876 static void
877 retry_count_tick(vector mpvec)
878 {
879         struct multipath *mpp;
880         unsigned int i;
881
882         vector_foreach_slot (mpvec, mpp, i) {
883                 if (mpp->retry_tick) {
884                         mpp->stat_total_queueing_time++;
885                         condlog(4, "%s: Retrying.. No active path", mpp->alias);
886                         if(--mpp->retry_tick == 0) {
887                                 dm_queue_if_no_path(mpp->alias, 0);
888                                 condlog(2, "%s: Disable queueing", mpp->alias);
889                         }
890                 }
891         }
892 }
893
894 void
895 check_path (struct vectors * vecs, struct path * pp)
896 {
897         int newstate;
898
899         if (!pp->mpp)
900                 return;
901
902         if (pp->tick && --pp->tick)
903                 return; /* don't check this path yet */
904
905         /*
906          * provision a next check soonest,
907          * in case we exit abnormaly from here
908          */
909         pp->tick = conf->checkint;
910
911         if (!checker_selected(&pp->checker)) {
912                 pathinfo(pp, conf->hwtable, DI_SYSFS);
913                 select_checker(pp);
914         }
915         if (!checker_selected(&pp->checker)) {
916                 condlog(0, "%s: checker is not set", pp->dev);
917                 return;
918         }
919         /*
920          * Set checker in async mode.
921          * Honored only by checker implementing the said mode.
922          */
923         checker_set_async(&pp->checker);
924
925         if (path_offline(pp))
926                 newstate = PATH_DOWN;
927         else
928                 newstate = checker_check(&pp->checker);
929
930         if (newstate < 0) {
931                 condlog(2, "%s: unusable path", pp->dev);
932                 pathinfo(pp, conf->hwtable, 0);
933                 return;
934         }
935         /*
936          * Async IO in flight. Keep the previous path state
937          * and reschedule as soon as possible
938          */
939         if (newstate == PATH_PENDING) {
940                 pp->tick = 1;
941                 return;
942         }
943         if (newstate != pp->state) {
944                 int oldstate = pp->state;
945                 pp->state = newstate;
946                 LOG_MSG(1, checker_message(&pp->checker));
947
948                 /*
949                  * upon state change, reset the checkint
950                  * to the shortest delay
951                  */
952                 pp->checkint = conf->checkint;
953
954                 if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
955                     update_multipath_strings(pp->mpp, vecs->pathvec)) {
956                         /*
957                          * proactively fail path in the DM
958                          */
959                         if (oldstate == PATH_UP ||
960                             oldstate == PATH_GHOST)
961                                 fail_path(pp, 1);
962                         else
963                                 fail_path(pp, 0);
964
965                         /*
966                          * cancel scheduled failback
967                          */
968                         pp->mpp->failback_tick = 0;
969
970                         pp->mpp->stat_path_failures++;
971                         return;
972                 }
973
974                 /*
975                  * reinstate this path
976                  */
977                 if (oldstate != PATH_UP &&
978                     oldstate != PATH_GHOST)
979                         reinstate_path(pp, 1);
980                 else
981                         reinstate_path(pp, 0);
982
983                 /*
984                  * schedule [defered] failback
985                  */
986                 if (pp->mpp->pgfailback > 0)
987                         pp->mpp->failback_tick =
988                                 pp->mpp->pgfailback + 1;
989                 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
990                     need_switch_pathgroup(pp->mpp, 1))
991                         switch_pathgroup(pp->mpp);
992
993                 /*
994                  * if at least one path is up in a group, and
995                  * the group is disabled, re-enable it
996                  */
997                 if (newstate == PATH_UP)
998                         enable_group(pp);
999         }
1000         else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1001                 LOG_MSG(4, checker_message(&pp->checker));
1002                 /*
1003                  * double the next check delay.
1004                  * max at conf->max_checkint
1005                  */
1006                 if (pp->checkint < (conf->max_checkint / 2))
1007                         pp->checkint = 2 * pp->checkint;
1008                 else
1009                         pp->checkint = conf->max_checkint;
1010
1011                 pp->tick = pp->checkint;
1012                 condlog(4, "%s: delay next check %is",
1013                                 pp->dev_t, pp->tick);
1014         }
1015         else if (newstate == PATH_DOWN)
1016                 LOG_MSG(2, checker_message(&pp->checker));
1017
1018         pp->state = newstate;
1019
1020         /*
1021          * path prio refreshing
1022          */
1023         condlog(4, "path prio refresh");
1024         pathinfo(pp, conf->hwtable, DI_PRIO);
1025
1026         /*
1027          * pathgroup failback policy
1028          */
1029         if (need_switch_pathgroup(pp->mpp, 0)) {
1030                 if (pp->mpp->pgfailback > 0 &&
1031                     pp->mpp->failback_tick <= 0)
1032                         pp->mpp->failback_tick =
1033                                 pp->mpp->pgfailback + 1;
1034                 else if (pp->mpp->pgfailback ==
1035                                 -FAILBACK_IMMEDIATE)
1036                         switch_pathgroup(pp->mpp);
1037         }
1038 }
1039
1040 static void *
1041 checkerloop (void *ap)
1042 {
1043         struct vectors *vecs;
1044         struct path *pp;
1045         int count = 0;
1046         unsigned int i;
1047         sigset_t old;
1048
1049         mlockall(MCL_CURRENT | MCL_FUTURE);
1050         vecs = (struct vectors *)ap;
1051         condlog(2, "path checkers start up");
1052
1053         /*
1054          * init the path check interval
1055          */
1056         vector_foreach_slot (vecs->pathvec, pp, i) {
1057                 pp->checkint = conf->checkint;
1058         }
1059
1060         while (1) {
1061                 block_signal(SIGHUP, &old);
1062                 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1063                 lock(vecs->lock);
1064                 condlog(4, "tick");
1065
1066                 if (vecs->pathvec) {
1067                         vector_foreach_slot (vecs->pathvec, pp, i) {
1068                                 check_path(vecs, pp);
1069                         }
1070                 }
1071                 if (vecs->mpvec) {
1072                         defered_failback_tick(vecs->mpvec);
1073                         retry_count_tick(vecs->mpvec);
1074                 }
1075                 if (count)
1076                         count--;
1077                 else {
1078                         condlog(4, "map garbage collection");
1079                         mpvec_garbage_collector(vecs);
1080                         count = MAPGCINT;
1081                 }
1082
1083                 lock_cleanup_pop(vecs->lock);
1084                 pthread_sigmask(SIG_SETMASK, &old, NULL);
1085                 sleep(1);
1086         }
1087         return NULL;
1088 }
1089
1090 int
1091 configure (struct vectors * vecs, int start_waiters)
1092 {
1093         struct multipath * mpp;
1094         struct path * pp;
1095         vector mpvec;
1096         int i;
1097
1098         if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
1099                 return 1;
1100
1101         if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
1102                 return 1;
1103
1104         if (!(mpvec = vector_alloc()))
1105                 return 1;
1106
1107         /*
1108          * probe for current path (from sysfs) and map (from dm) sets
1109          */
1110         path_discovery(vecs->pathvec, conf, DI_ALL);
1111
1112         vector_foreach_slot (vecs->pathvec, pp, i){
1113                 if (filter_path(conf, pp) > 0){
1114                         vector_del_slot(vecs->pathvec, i);
1115                         free_path(pp);
1116                         i--;
1117                 }
1118                 else
1119                         pp->checkint = conf->checkint;
1120         }
1121         if (map_discovery(vecs))
1122                 return 1;
1123
1124         /*
1125          * create new set of maps & push changed ones into dm
1126          */
1127         if (coalesce_paths(vecs, mpvec, NULL, 0))
1128                 return 1;
1129
1130         /*
1131          * may need to remove some maps which are no longer relevant
1132          * e.g., due to blacklist changes in conf file
1133          */
1134         if (coalesce_maps(vecs, mpvec))
1135                 return 1;
1136
1137         dm_lib_release();
1138
1139         sync_maps_state(mpvec);
1140
1141         /*
1142          * purge dm of old maps
1143          */
1144         remove_maps(vecs);
1145
1146         /*
1147          * save new set of maps formed by considering current path state
1148          */
1149         vector_free(vecs->mpvec);
1150         vecs->mpvec = mpvec;
1151
1152         /*
1153          * start dm event waiter threads for these new maps
1154          */
1155         vector_foreach_slot(vecs->mpvec, mpp, i) {
1156                 if (setup_multipath(vecs, mpp))
1157                         return 1;
1158                 if (start_waiters)
1159                         if (start_waiter_thread(mpp, vecs))
1160                                 return 1;
1161         }
1162         return 0;
1163 }
1164
1165 int
1166 reconfigure (struct vectors * vecs)
1167 {
1168         struct config * old = conf;
1169
1170         /*
1171          * free old map and path vectors ... they use old conf state
1172          */
1173         if (VECTOR_SIZE(vecs->mpvec))
1174                 remove_maps_and_stop_waiters(vecs);
1175
1176         if (VECTOR_SIZE(vecs->pathvec))
1177                 free_pathvec(vecs->pathvec, FREE_PATHS);
1178
1179         vecs->pathvec = NULL;
1180         conf = NULL;
1181
1182         if (load_config(DEFAULT_CONFIGFILE))
1183                 return 1;
1184
1185         conf->verbosity = old->verbosity;
1186
1187         if (!conf->checkint) {
1188                 conf->checkint = DEFAULT_CHECKINT;
1189                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1190         }
1191         configure(vecs, 1);
1192         free_config(old);
1193         return 0;
1194 }
1195
1196 static struct vectors *
1197 init_vecs (void)
1198 {
1199         struct vectors * vecs;
1200
1201         vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
1202
1203         if (!vecs)
1204                 return NULL;
1205
1206         vecs->lock.mutex =
1207                 (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
1208
1209         if (!vecs->lock.mutex)
1210                 goto out;
1211
1212         pthread_mutex_init(vecs->lock.mutex, NULL);
1213         vecs->lock.depth = 0;
1214
1215         return vecs;
1216
1217 out:
1218         FREE(vecs);
1219         condlog(0, "failed to init paths");
1220         return NULL;
1221 }
1222
1223 static void *
1224 signal_set(int signo, void (*func) (int))
1225 {
1226         int r;
1227         struct sigaction sig;
1228         struct sigaction osig;
1229
1230         sig.sa_handler = func;
1231         sigemptyset(&sig.sa_mask);
1232         sig.sa_flags = 0;
1233
1234         r = sigaction(signo, &sig, &osig);
1235
1236         if (r < 0)
1237                 return (SIG_ERR);
1238         else
1239                 return (osig.sa_handler);
1240 }
1241
1242 static void
1243 sighup (int sig)
1244 {
1245         condlog(2, "reconfigure (SIGHUP)");
1246
1247         lock(gvecs->lock);
1248         reconfigure(gvecs);
1249         unlock(gvecs->lock);
1250
1251 #ifdef _DEBUG_
1252         dbg_free_final(NULL);
1253 #endif
1254 }
1255
1256 static void
1257 sigend (int sig)
1258 {
1259         exit_daemon(0);
1260 }
1261
1262 static void
1263 sigusr1 (int sig)
1264 {
1265         condlog(3, "SIGUSR1 received");
1266 }
1267
1268 static void
1269 signal_init(void)
1270 {
1271         signal_set(SIGHUP, sighup);
1272         signal_set(SIGUSR1, sigusr1);
1273         signal_set(SIGINT, sigend);
1274         signal_set(SIGTERM, sigend);
1275         signal(SIGPIPE, SIG_IGN);
1276 }
1277
1278 static void
1279 setscheduler (void)
1280 {
1281         int res;
1282         static struct sched_param sched_param = {
1283                 .sched_priority = 99
1284         };
1285
1286         res = sched_setscheduler (0, SCHED_RR, &sched_param);
1287
1288         if (res == -1)
1289                 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
1290         return;
1291 }
1292
1293 static void
1294 set_oom_adj (int val)
1295 {
1296         FILE *fp;
1297
1298         fp = fopen("/proc/self/oom_adj", "w");
1299
1300         if (!fp)
1301                 return;
1302
1303         fprintf(fp, "%i", val);
1304         fclose(fp);
1305 }
1306
1307 void
1308 setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
1309 {
1310         if (pthread_attr_init(attr)) {
1311                 fprintf(stderr, "can't initialize thread attr: %s\n",
1312                         strerror(errno));
1313                 exit(1);
1314         }
1315         if (stacksize < PTHREAD_STACK_MIN)
1316                 stacksize = PTHREAD_STACK_MIN;
1317
1318         if (pthread_attr_setstacksize(attr, stacksize)) {
1319                 fprintf(stderr, "can't set thread stack size to %lu: %s\n",
1320                         (unsigned long)stacksize, strerror(errno));
1321                 exit(1);
1322         }
1323         if (detached && pthread_attr_setdetachstate(attr,
1324                                                     PTHREAD_CREATE_DETACHED)) {
1325                 fprintf(stderr, "can't set thread to detached: %s\n",
1326                         strerror(errno));
1327                 exit(1);
1328         }
1329 }
1330
1331 static int
1332 child (void * param)
1333 {
1334         pthread_t check_thr, uevent_thr, uxlsnr_thr;
1335         pthread_attr_t log_attr, misc_attr;
1336         struct vectors * vecs;
1337
1338         mlockall(MCL_CURRENT | MCL_FUTURE);
1339
1340         setup_thread_attr(&misc_attr, 64 * 1024, 1);
1341         setup_thread_attr(&waiter_attr, 32 * 1024, 1);
1342
1343         if (logsink) {
1344                 setup_thread_attr(&log_attr, 64 * 1024, 0);
1345                 log_thread_start(&log_attr);
1346                 pthread_attr_destroy(&log_attr);
1347         }
1348
1349         condlog(2, "--------start up--------");
1350         condlog(2, "read " DEFAULT_CONFIGFILE);
1351
1352         if (load_config(DEFAULT_CONFIGFILE))
1353                 exit(1);
1354
1355         if (init_checkers()) {
1356                 condlog(0, "failed to initialize checkers");
1357                 exit(1);
1358         }
1359         if (init_prio()) {
1360                 condlog(0, "failed to initialize prioritizers");
1361                 exit(1);
1362         }
1363
1364         setlogmask(LOG_UPTO(conf->verbosity + 3));
1365
1366         /*
1367          * fill the voids left in the config file
1368          */
1369         if (!conf->checkint) {
1370                 conf->checkint = DEFAULT_CHECKINT;
1371                 conf->max_checkint = MAX_CHECKINT(conf->checkint);
1372         }
1373
1374         if (conf->max_fds) {
1375                 struct rlimit fd_limit;
1376
1377                 fd_limit.rlim_cur = conf->max_fds;
1378                 fd_limit.rlim_max = conf->max_fds;
1379                 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
1380                         condlog(0, "can't set open fds limit to %d : %s\n",
1381                                 conf->max_fds, strerror(errno));
1382         }
1383
1384         if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
1385                 if (logsink)
1386                         log_thread_stop();
1387
1388                 exit(1);
1389         }
1390         signal_init();
1391         setscheduler();
1392         set_oom_adj(-16);
1393         vecs = gvecs = init_vecs();
1394
1395         if (!vecs)
1396                 exit(1);
1397
1398         if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
1399                 condlog(0, "can not find sysfs mount point");
1400                 exit(1);
1401         }
1402         conf->daemon = 1;
1403         /*
1404          * fetch and configure both paths and multipaths
1405          */
1406         if (configure(vecs, 1)) {
1407                 condlog(0, "failure during configuration");
1408                 exit(1);
1409         }
1410         /*
1411          * start threads
1412          */
1413         pthread_create(&check_thr, &misc_attr, checkerloop, vecs);
1414         pthread_create(&uevent_thr, &misc_attr, ueventloop, vecs);
1415         pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
1416         pthread_attr_destroy(&misc_attr);
1417
1418         pthread_cond_wait(&exit_cond, &exit_mutex);
1419
1420         /*
1421          * exit path
1422          */
1423         block_signal(SIGHUP, NULL);
1424         lock(vecs->lock);
1425         remove_maps_and_stop_waiters(vecs);
1426         free_pathvec(vecs->pathvec, FREE_PATHS);
1427
1428         pthread_cancel(check_thr);
1429         pthread_cancel(uevent_thr);
1430         pthread_cancel(uxlsnr_thr);
1431
1432         sysfs_cleanup();
1433
1434         free_keys(keys);
1435         keys = NULL;
1436         free_handlers(handlers);
1437         handlers = NULL;
1438         free_polls();
1439
1440         unlock(vecs->lock);
1441         /* Now all the waitevent threads will start rushing in. */
1442         while (vecs->lock.depth > 0) {
1443                 sleep (1); /* This is weak. */
1444                 condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
1445         }
1446         pthread_mutex_destroy(vecs->lock.mutex);
1447         FREE(vecs->lock.mutex);
1448         vecs->lock.depth = 0;
1449         vecs->lock.mutex = NULL;
1450         FREE(vecs);
1451         vecs = NULL;
1452
1453         condlog(2, "--------shut down-------");
1454
1455         if (logsink)
1456                 log_thread_stop();
1457
1458         dm_lib_release();
1459         dm_lib_exit();
1460
1461         cleanup_prio();
1462         cleanup_checkers();
1463         /*
1464          * Freeing config must be done after condlog() and dm_lib_exit(),
1465          * because logging functions like dlog() and dm_write_log()
1466          * reference the config.
1467          */
1468         free_config(conf);
1469         conf = NULL;
1470
1471 #ifdef _DEBUG_
1472         dbg_free_final(NULL);
1473 #endif
1474
1475         exit(0);
1476 }
1477
1478 static int
1479 daemonize(void)
1480 {
1481         int pid;
1482         int in_fd, out_fd;
1483
1484         if( (pid = fork()) < 0){
1485                 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
1486                 return -1;
1487         }
1488         else if (pid != 0)
1489                 return pid;
1490
1491         setsid();
1492
1493         if ( (pid = fork()) < 0)
1494                 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
1495         else if (pid != 0)
1496                 _exit(0);
1497
1498         in_fd = open("/dev/null", O_RDONLY);
1499         if (in_fd < 0){
1500                 fprintf(stderr, "cannot open /dev/null for input : %s\n",
1501                         strerror(errno));
1502                 _exit(0);
1503         }
1504         out_fd = open("/dev/console", O_WRONLY);
1505         if (out_fd < 0){
1506                 fprintf(stderr, "cannot open /dev/console for output : %s\n",
1507                         strerror(errno));
1508                 _exit(0);
1509         }
1510
1511         close(STDIN_FILENO);
1512         dup(in_fd);
1513         close(STDOUT_FILENO);
1514         dup(out_fd);
1515         close(STDERR_FILENO);
1516         dup(out_fd);
1517
1518         close(in_fd);
1519         close(out_fd);
1520         if (chdir("/") < 0)
1521                 fprintf(stderr, "cannot chdir to '/', continuing\n");
1522
1523         return 0;
1524 }
1525
1526 int
1527 main (int argc, char *argv[])
1528 {
1529         extern char *optarg;
1530         extern int optind;
1531         int arg;
1532         int err;
1533
1534         logsink = 1;
1535         dm_init();
1536
1537         if (getuid() != 0) {
1538                 fprintf(stderr, "need to be root\n");
1539                 exit(1);
1540         }
1541
1542         /* make sure we don't lock any path */
1543         chdir("/");
1544         umask(umask(077) | 022);
1545
1546         conf = alloc_config();
1547
1548         if (!conf)
1549                 exit(1);
1550
1551         while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
1552         switch(arg) {
1553                 case 'd':
1554                         logsink = 0;
1555                         //debug=1; /* ### comment me out ### */
1556                         break;
1557                 case 'v':
1558                         if (sizeof(optarg) > sizeof(char *) ||
1559                             !isdigit(optarg[0]))
1560                                 exit(1);
1561
1562                         conf->verbosity = atoi(optarg);
1563                         break;
1564                 case 'k':
1565                         uxclnt(optarg);
1566                         exit(0);
1567                 default:
1568                         ;
1569                 }
1570         }
1571         if (optind < argc) {
1572                 char cmd[CMDSIZE];
1573                 char * s = cmd;
1574                 char * c = s;
1575
1576                 while (optind < argc) {
1577                         if (strchr(argv[optind], ' '))
1578                                 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
1579                         else
1580                                 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
1581                         optind++;
1582                 }
1583                 c += snprintf(c, s + CMDSIZE - c, "\n");
1584                 uxclnt(s);
1585                 exit(0);
1586         }
1587
1588         if (!logsink)
1589                 err = 0;
1590         else
1591                 err = daemonize();
1592
1593         if (err < 0)
1594                 /* error */
1595                 exit(1);
1596         else if (err > 0)
1597                 /* parent dies */
1598                 exit(0);
1599         else
1600                 /* child lives */
1601                 return (child(NULL));
1602 }
1603