2 * Copyright (c) 2005 Hannes Reinecke, Suse
12 #include <sys/ioctl.h>
18 #include "../libmultipath/debug.h"
19 #include "../libmultipath/time-util.h"
21 #define AIO_GROUP_SIZE 1024
23 /* Note: This checker type relies on the fact that only one checker can be run
24 * at a time, since multiple checkers share the same aio_group, and must be
25 * able to modify other checker's async_reqs. If multiple checkers become able
26 * to be run at the same time, this checker will need to add locking, and
27 * probably polling on event fds, to deal with that */
30 struct list_head node;
33 struct list_head orphans;
40 struct list_head node;
41 int state; /* PATH_REMOVED means this is an orphan */
44 static LIST_HEAD(aio_grp_list);
47 MSG_DIRECTIO_UNKNOWN = CHECKER_FIRST_MSGID,
49 MSG_DIRECTIO_BLOCKSIZE,
52 #define _IDX(x) (MSG_DIRECTIO_##x - CHECKER_FIRST_MSGID)
53 const char *libcheck_msgtable[] = {
54 [_IDX(UNKNOWN)] = " is not available",
55 [_IDX(PENDING)] = " is waiting on aio",
56 [_IDX(BLOCKSIZE)] = " cannot get blocksize, set default",
60 #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args)
62 struct directio_context {
65 struct aio_group *aio_grp;
66 struct async_req *req;
69 static struct aio_group *
72 struct aio_group *aio_grp;
74 aio_grp = malloc(sizeof(struct aio_group));
77 memset(aio_grp, 0, sizeof(struct aio_group));
78 INIT_LIST_HEAD(&aio_grp->orphans);
80 if (io_setup(AIO_GROUP_SIZE, &aio_grp->ioctx) != 0) {
81 LOG(1, "io_setup failed");
83 LOG(1, "global number of io events too small. Increase fs.aio-max-nr with sysctl");
87 list_add(&aio_grp->node, &aio_grp_list);
92 set_aio_group(struct directio_context *ct)
94 struct aio_group *aio_grp = NULL;
96 list_for_each_entry(aio_grp, &aio_grp_list, node)
97 if (aio_grp->holders < AIO_GROUP_SIZE)
99 aio_grp = add_aio_group();
106 ct->aio_grp = aio_grp;
111 remove_aio_group(struct aio_group *aio_grp)
113 struct async_req *req, *tmp;
115 io_destroy(aio_grp->ioctx);
116 list_for_each_entry_safe(req, tmp, &aio_grp->orphans, node) {
117 list_del(&req->node);
121 list_del(&aio_grp->node);
125 /* If an aio_group is completely full of orphans, then no checkers can
126 * use it, which means that no checkers can clear out the orphans. To
127 * avoid keeping the useless group around, simply remove remove the
130 check_orphaned_group(struct aio_group *aio_grp)
133 struct list_head *item;
135 if (aio_grp->holders < AIO_GROUP_SIZE)
137 list_for_each(item, &aio_grp->orphans)
139 if (count >= AIO_GROUP_SIZE)
140 remove_aio_group(aio_grp);
143 void libcheck_reset (void)
145 struct aio_group *aio_grp, *tmp;
147 list_for_each_entry_safe(aio_grp, tmp, &aio_grp_list, node)
148 remove_aio_group(aio_grp);
151 int libcheck_init (struct checker * c)
153 unsigned long pgsize = getpagesize();
154 struct directio_context * ct;
155 struct async_req *req = NULL;
158 ct = malloc(sizeof(struct directio_context));
161 memset(ct, 0, sizeof(struct directio_context));
163 if (set_aio_group(ct) < 0)
166 req = malloc(sizeof(struct async_req));
170 memset(req, 0, sizeof(struct async_req));
171 INIT_LIST_HEAD(&req->node);
173 if (ioctl(c->fd, BLKBSZGET, &req->blksize) < 0) {
174 c->msgid = MSG_DIRECTIO_BLOCKSIZE;
177 if (req->blksize > 4096) {
179 * Sanity check for DASD; BSZGET is broken
186 if (posix_memalign((void **)&req->buf, pgsize, req->blksize) != 0)
189 flags = fcntl(c->fd, F_GETFL);
192 if (!(flags & O_DIRECT)) {
194 if (fcntl(c->fd, F_SETFL, flags) < 0)
199 /* Successfully initialized, return the context. */
201 c->context = (void *) ct;
211 ct->aio_grp->holders--;
216 void libcheck_free (struct checker * c)
218 struct directio_context * ct = (struct directio_context *)c->context;
219 struct io_event event;
225 if (ct->reset_flags) {
226 if ((flags = fcntl(c->fd, F_GETFL)) >= 0) {
227 int ret __attribute__ ((unused));
230 /* No point in checking for errors */
231 ret = fcntl(c->fd, F_SETFL, flags);
236 (ct->req->state != PATH_PENDING ||
237 io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event) == 0))
242 ct->aio_grp->holders--;
244 ct->req->state = PATH_REMOVED;
245 list_add(&ct->req->node, &ct->aio_grp->orphans);
246 check_orphaned_group(ct->aio_grp);
254 get_events(struct aio_group *aio_grp, struct timespec *timeout)
256 struct io_event events[128];
257 int i, nr, got_events = 0;
258 struct timespec zero_timeout = { .tv_sec = 0, };
259 struct timespec *timep = timeout;
263 nr = io_getevents(aio_grp->ioctx, 1, 128, events, timep);
264 got_events |= (nr > 0);
266 for (i = 0; i < nr; i++) {
267 struct async_req *req = container_of(events[i].obj, struct async_req, io);
269 LOG(3, "io finished %lu/%lu", events[i].res,
272 /* got an orphaned request */
273 if (req->state == PATH_REMOVED) {
274 list_del(&req->node);
279 req->state = (events[i].res == req->blksize) ?
282 timep = &zero_timeout;
283 } while (nr == 128); /* assume there are more events and try again */
286 LOG(3, "async io getevents returned %i (errno=%s)",
287 nr, strerror(errno));
293 check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
295 struct timespec timeout = { .tv_nsec = 1000 };
299 struct timespec currtime, endtime;
301 if (fstat(fd, &sb) == 0) {
302 LOG(4, "called for %x", (unsigned) sb.st_rdev);
305 LOG(4, "called in synchronous mode");
306 timeout.tv_sec = timeout_secs;
311 if (ct->req->state != PATH_PENDING) {
313 return ct->req->state;
316 struct iocb *ios[1] = { &ct->req->io };
318 LOG(3, "starting new request");
319 memset(&ct->req->io, 0, sizeof(struct iocb));
320 io_prep_pread(&ct->req->io, fd, ct->req->buf,
321 ct->req->blksize, 0);
322 ct->req->state = PATH_PENDING;
323 if (io_submit(ct->aio_grp->ioctx, 1, ios) != 1) {
324 LOG(3, "io_submit error %i", errno);
325 return PATH_UNCHECKED;
330 get_monotonic_time(&endtime);
331 endtime.tv_sec += timeout.tv_sec;
332 endtime.tv_nsec += timeout.tv_nsec;
333 normalize_timespec(&endtime);
335 r = get_events(ct->aio_grp, &timeout);
337 if (ct->req->state != PATH_PENDING) {
339 return ct->req->state;
341 (timeout.tv_sec == 0 && timeout.tv_nsec == 0))
344 get_monotonic_time(&currtime);
345 timespecsub(&endtime, &currtime, &timeout);
346 if (timeout.tv_sec < 0)
347 timeout.tv_sec = timeout.tv_nsec = 0;
349 if (ct->running > timeout_secs || sync) {
350 struct io_event event;
352 LOG(3, "abort check on timeout");
354 r = io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
356 * Only reset ct->running if we really
357 * could abort the pending I/O
363 LOG(3, "async io pending");
370 int libcheck_check (struct checker * c)
373 struct directio_context * ct = (struct directio_context *)c->context;
376 return PATH_UNCHECKED;
378 ret = check_state(c->fd, ct, checker_is_sync(c), c->timeout);
383 c->msgid = MSG_DIRECTIO_UNKNOWN;
386 c->msgid = CHECKER_MSGID_DOWN;
389 c->msgid = CHECKER_MSGID_UP;
392 c->msgid = MSG_DIRECTIO_PENDING;