From 8a9de22b049f332a574eead7fa738678397f47fc Mon Sep 17 00:00:00 2001 From: Christophe Varoqui Date: Fri, 15 Jun 2007 01:12:40 +0200 Subject: [PATCH] [libcheckers] async path checking in the framework The directio path checker was recently moved to aio API, but the behaviour is still synchronous : io_getevents() blocks until (long) timeout expires. Truely asynchronous behaviour imposes to o lower the io_getevents timeout to mininum o treat a new "pending" checker return status in the daemon (reschedule the checker early for a new io_getevents until the "long" timeout expires) This patch explores this. This approach has the nice effect to behave well with still-synchronous checker. The daemon should see no regression. But multipath, which assumes synchronous path checking, is for now broken ... to be repaired. Please comment abundantly this approach, before I start moving ahead. --- libcheckers/checkers.h | 7 ++++++- libcheckers/directio.c | 28 +++++++++++++++++++--------- multipathd/main.c | 9 ++++++++- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/libcheckers/checkers.h b/libcheckers/checkers.h index d4dad9c..b0e2a62 100644 --- a/libcheckers/checkers.h +++ b/libcheckers/checkers.h @@ -40,6 +40,9 @@ * The path needs an initialization command to be sent to it in order for * I/Os to succeed. * + * PATH_PENDING: + * - Use: All async checkers + * - Description: Indicates a check IO is in flight. */ #define PATH_WILD -1 #define PATH_UNCHECKED 0 @@ -47,6 +50,7 @@ #define PATH_UP 2 #define PATH_SHAKY 3 #define PATH_GHOST 4 +#define PATH_PENDING 5 #define DIRECTIO "directio" #define TUR "tur" @@ -70,7 +74,8 @@ * Provision a long timeout. Longer than any real-world application would cope * with. */ -#define DEF_TIMEOUT 300000 +#define DEF_TIMEOUT 300000 +#define ASYNC_TIMEOUT_SEC 30 /* * strings lengths diff --git a/libcheckers/directio.c b/libcheckers/directio.c index 91ed758..db19881 100644 --- a/libcheckers/directio.c +++ b/libcheckers/directio.c @@ -22,6 +22,9 @@ #define MSG_DIRECTIO_UNKNOWN "directio checker is not available" #define MSG_DIRECTIO_UP "directio checker reports path is up" #define MSG_DIRECTIO_DOWN "directio checker reports path is down" +#define MSG_DIRECTIO_PENDING "directio checker is waiting on aio" + +#define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args) struct directio_context { int running; @@ -117,36 +120,40 @@ void directio_free (struct checker * c) static int check_state(int fd, struct directio_context *ct) { - struct timespec timeout = { .tv_sec = 2 }; + struct timespec timeout = { .tv_nsec = 5 }; struct io_event event; struct stat sb; int rc = PATH_UNCHECKED; long r; if (fstat(fd, &sb) == 0) { - condlog(4, "directio: called for %x", (unsigned) sb.st_rdev); + LOG(4, "called for %x", (unsigned) sb.st_rdev); } if (!ct->running) { struct iocb *ios[1] = { &ct->io }; - condlog(3, "directio: starting new request"); + LOG(3, "starting new request"); memset(&ct->io, 0, sizeof(struct iocb)); io_prep_pread(&ct->io, fd, ct->ptr, ct->blksize, 0); if (io_submit(ct->ioctx, 1, ios) != 1) { - condlog(3, "directio: io_submit error %i", errno); + LOG(3, "io_submit error %i", errno); return PATH_UNCHECKED; } } - ct->running = 1; + ct->running++; r = io_getevents(ct->ioctx, 1L, 1L, &event, &timeout); + LOG(3, "async io getevents returns %li (errno=%s)", r, strerror(errno)); + if (r < 1L) { - condlog(3, "directio: timeout r=%li errno=%i", r, errno); - rc = PATH_DOWN; + if (ct->running > ASYNC_TIMEOUT_SEC) { + LOG(3, "abort check on timeout"); + rc = PATH_DOWN; + } else + rc = PATH_PENDING; } else { - condlog(3, "directio: io finished %lu/%lu", event.res, - event.res2); + LOG(3, "io finished %lu/%lu", event.res, event.res2); ct->running = 0; rc = (event.res == ct->blksize) ? PATH_UP : PATH_DOWN; } @@ -175,6 +182,9 @@ int directio (struct checker * c) case PATH_UP: MSG(c, MSG_DIRECTIO_UP); break; + case PATH_PENDING: + MSG(c, MSG_DIRECTIO_PENDING); + break; default: break; } diff --git a/multipathd/main.c b/multipathd/main.c index c432331..a574bee 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -900,7 +900,14 @@ checkerloop (void *ap) pathinfo(pp, conf->hwtable, 0); continue; } - + /* + * Async IO in flight. Keep the previous path state + * and reschedule as soon as possible + */ + if (newstate == PATH_PENDING) { + pp->tick = 1; + continue; + } if (newstate != pp->state) { int oldstate = pp->state; pp->state = newstate; -- 2.7.4