2 * Copyright (c) 2005 Hannes Reinecke, Suse
12 #include <sys/ioctl.h>
19 #include "time-util.h"
21 #define AIO_GROUP_SIZE 1024
23 /* Note: This checker type relies on the fact that only one checker can be run
24 * at a time, since multiple checkers share the same aio_group, and must be
25 * able to modify other checker's async_reqs. If multiple checkers become able
26 * to be run at the same time, this checker will need to add locking, and
27 * probably polling on event fds, to deal with that */
30 struct list_head node;
33 struct list_head orphans;
40 struct list_head node;
41 int state; /* PATH_REMOVED means this is an orphan */
44 static LIST_HEAD(aio_grp_list);
47 MSG_DIRECTIO_UNKNOWN = CHECKER_FIRST_MSGID,
49 MSG_DIRECTIO_BLOCKSIZE,
52 #define _IDX(x) (MSG_DIRECTIO_##x - CHECKER_FIRST_MSGID)
53 const char *libcheck_msgtable[] = {
54 [_IDX(UNKNOWN)] = " is not available",
55 [_IDX(PENDING)] = " is waiting on aio",
56 [_IDX(BLOCKSIZE)] = " cannot get blocksize, set default",
60 #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args)
62 struct directio_context {
65 struct aio_group *aio_grp;
66 struct async_req *req;
69 static struct aio_group *
72 struct aio_group *aio_grp;
75 aio_grp = malloc(sizeof(struct aio_group));
78 memset(aio_grp, 0, sizeof(struct aio_group));
79 INIT_LIST_HEAD(&aio_grp->orphans);
81 if ((rc = io_setup(AIO_GROUP_SIZE, &aio_grp->ioctx)) != 0) {
82 LOG(1, "io_setup failed");
84 LOG(1, "global number of io events too small. Increase fs.aio-max-nr with sysctl");
88 list_add(&aio_grp->node, &aio_grp_list);
93 set_aio_group(struct directio_context *ct)
95 struct aio_group *aio_grp = NULL;
97 list_for_each_entry(aio_grp, &aio_grp_list, node)
98 if (aio_grp->holders < AIO_GROUP_SIZE)
100 aio_grp = add_aio_group();
107 ct->aio_grp = aio_grp;
112 remove_aio_group(struct aio_group *aio_grp)
114 struct async_req *req, *tmp;
116 io_destroy(aio_grp->ioctx);
117 list_for_each_entry_safe(req, tmp, &aio_grp->orphans, node) {
118 list_del(&req->node);
122 list_del(&aio_grp->node);
126 /* If an aio_group is completely full of orphans, then no checkers can
127 * use it, which means that no checkers can clear out the orphans. To
128 * avoid keeping the useless group around, simply remove the
131 check_orphaned_group(struct aio_group *aio_grp)
134 struct list_head *item;
136 if (aio_grp->holders < AIO_GROUP_SIZE)
138 list_for_each(item, &aio_grp->orphans)
140 if (count >= AIO_GROUP_SIZE)
141 remove_aio_group(aio_grp);
144 void libcheck_reset (void)
146 struct aio_group *aio_grp, *tmp;
148 list_for_each_entry_safe(aio_grp, tmp, &aio_grp_list, node)
149 remove_aio_group(aio_grp);
152 int libcheck_init (struct checker * c)
154 unsigned long pgsize = getpagesize();
155 struct directio_context * ct;
156 struct async_req *req = NULL;
159 ct = malloc(sizeof(struct directio_context));
162 memset(ct, 0, sizeof(struct directio_context));
164 if (set_aio_group(ct) < 0)
167 req = malloc(sizeof(struct async_req));
171 memset(req, 0, sizeof(struct async_req));
172 INIT_LIST_HEAD(&req->node);
174 if (ioctl(c->fd, BLKBSZGET, &req->blksize) < 0) {
175 c->msgid = MSG_DIRECTIO_BLOCKSIZE;
178 if (req->blksize > 4096) {
180 * Sanity check for DASD; BSZGET is broken
187 if (posix_memalign((void **)&req->buf, pgsize, req->blksize) != 0)
190 flags = fcntl(c->fd, F_GETFL);
193 if (!(flags & O_DIRECT)) {
195 if (fcntl(c->fd, F_SETFL, flags) < 0)
200 /* Successfully initialized, return the context. */
202 c->context = (void *) ct;
212 ct->aio_grp->holders--;
217 void libcheck_free (struct checker * c)
219 struct directio_context * ct = (struct directio_context *)c->context;
220 struct io_event event;
226 if (ct->reset_flags) {
227 if ((flags = fcntl(c->fd, F_GETFL)) >= 0) {
228 int ret __attribute__ ((unused));
231 /* No point in checking for errors */
232 ret = fcntl(c->fd, F_SETFL, flags);
236 if (ct->running && ct->req->state != PATH_PENDING)
241 ct->aio_grp->holders--;
243 /* Currently a no-op */
244 io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
245 ct->req->state = PATH_REMOVED;
246 list_add(&ct->req->node, &ct->aio_grp->orphans);
247 check_orphaned_group(ct->aio_grp);
255 get_events(struct aio_group *aio_grp, struct timespec *timeout)
257 struct io_event events[128];
258 int i, nr, got_events = 0;
259 struct timespec zero_timeout = { .tv_sec = 0, };
260 struct timespec *timep = timeout;
263 nr = io_getevents(aio_grp->ioctx, 1, 128, events, timep);
264 got_events |= (nr > 0);
266 for (i = 0; i < nr; i++) {
267 struct async_req *req = container_of(events[i].obj, struct async_req, io);
269 LOG(4, "io finished %lu/%lu", events[i].res,
272 /* got an orphaned request */
273 if (req->state == PATH_REMOVED) {
274 list_del(&req->node);
279 req->state = (events[i].res == req->blksize) ?
282 timep = &zero_timeout;
283 } while (nr == 128); /* assume there are more events and try again */
286 LOG(4, "async io getevents returned %s", strerror(-nr));
292 check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
294 struct timespec timeout = { .tv_nsec = 1000 };
298 struct timespec currtime, endtime;
300 if (fstat(fd, &sb) == 0) {
301 LOG(4, "called for %x", (unsigned) sb.st_rdev);
304 LOG(4, "called in synchronous mode");
305 timeout.tv_sec = timeout_secs;
310 if (ct->req->state != PATH_PENDING) {
312 return ct->req->state;
315 struct iocb *ios[1] = { &ct->req->io };
317 LOG(4, "starting new request");
318 memset(&ct->req->io, 0, sizeof(struct iocb));
319 io_prep_pread(&ct->req->io, fd, ct->req->buf,
320 ct->req->blksize, 0);
321 ct->req->state = PATH_PENDING;
322 if ((rc = io_submit(ct->aio_grp->ioctx, 1, ios)) != 1) {
323 LOG(3, "io_submit error %i", -rc);
324 return PATH_UNCHECKED;
329 get_monotonic_time(&endtime);
330 endtime.tv_sec += timeout.tv_sec;
331 endtime.tv_nsec += timeout.tv_nsec;
332 normalize_timespec(&endtime);
334 r = get_events(ct->aio_grp, &timeout);
336 if (ct->req->state != PATH_PENDING) {
338 return ct->req->state;
340 (timeout.tv_sec == 0 && timeout.tv_nsec == 0))
343 get_monotonic_time(&currtime);
344 timespecsub(&endtime, &currtime, &timeout);
345 if (timeout.tv_sec < 0)
346 timeout.tv_sec = timeout.tv_nsec = 0;
348 if (ct->running > timeout_secs || sync) {
349 struct io_event event;
351 LOG(3, "abort check on timeout");
353 io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
356 LOG(4, "async io pending");
363 int libcheck_check (struct checker * c)
366 struct directio_context * ct = (struct directio_context *)c->context;
369 return PATH_UNCHECKED;
371 ret = check_state(c->fd, ct, checker_is_sync(c), c->timeout);
376 c->msgid = MSG_DIRECTIO_UNKNOWN;
379 c->msgid = CHECKER_MSGID_DOWN;
382 c->msgid = CHECKER_MSGID_UP;
385 c->msgid = MSG_DIRECTIO_PENDING;