2 * QEMU aio implementation
4 * Copyright IBM, Corp. 2008
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
16 #include "qemu-common.h"
17 #include "block/block.h"
18 #include "qemu/queue.h"
19 #include "qemu/sockets.h"
29 QLIST_ENTRY(AioHandler) node;
32 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
36 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
37 if (node->pfd.fd == fd)
45 void aio_set_fd_handler(AioContext *ctx,
53 node = find_aio_handler(ctx, fd);
55 /* Are we deleting the fd handler? */
56 if (!io_read && !io_write) {
58 g_source_remove_poll(&ctx->source, &node->pfd);
60 /* If the lock is held, just mark the node as deleted */
61 if (ctx->walking_handlers) {
63 node->pfd.revents = 0;
65 /* Otherwise, delete it for real. We can't just mark it as
66 * deleted because deleted nodes are only cleaned up after
67 * releasing the walking_handlers lock.
69 QLIST_REMOVE(node, node);
75 /* Alloc and insert if it's not already there */
76 node = g_malloc0(sizeof(AioHandler));
78 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
80 g_source_add_poll(&ctx->source, &node->pfd);
82 /* Update handler with latest information */
83 node->io_read = io_read;
84 node->io_write = io_write;
85 node->opaque = opaque;
86 node->pollfds_idx = -1;
88 node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
89 node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
95 void aio_set_event_notifier(AioContext *ctx,
96 EventNotifier *notifier,
97 EventNotifierHandler *io_read)
99 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
100 (IOHandler *)io_read, NULL, notifier);
103 bool aio_pending(AioContext *ctx)
107 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
110 revents = node->pfd.revents & node->pfd.events;
111 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
114 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
122 static bool aio_dispatch(AioContext *ctx)
125 bool progress = false;
128 * We have to walk very carefully in case aio_set_fd_handler is
129 * called while we're walking.
131 node = QLIST_FIRST(&ctx->aio_handlers);
136 ctx->walking_handlers++;
138 revents = node->pfd.revents & node->pfd.events;
139 node->pfd.revents = 0;
141 if (!node->deleted &&
142 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
144 node->io_read(node->opaque);
146 /* aio_notify() does not count as progress */
147 if (node->opaque != &ctx->notifier) {
151 if (!node->deleted &&
152 (revents & (G_IO_OUT | G_IO_ERR)) &&
154 node->io_write(node->opaque);
159 node = QLIST_NEXT(node, node);
161 ctx->walking_handlers--;
163 if (!ctx->walking_handlers && tmp->deleted) {
164 QLIST_REMOVE(tmp, node);
170 progress |= timerlistgroup_run_timers(&ctx->tlg);
175 bool aio_poll(AioContext *ctx, bool blocking)
178 bool was_dispatching;
182 was_dispatching = ctx->dispatching;
185 /* aio_notify can avoid the expensive event_notifier_set if
186 * everything (file descriptors, bottom halves, timers) will
187 * be re-evaluated before the next blocking poll(). This happens
190 * 1) when aio_poll is called with blocking == false
192 * 2) when we are called after poll(). If we are called before
193 * poll(), bottom halves will not be re-evaluated and we need
194 * aio_notify() if blocking == true.
196 * The first aio_dispatch() only does something when AioContext is
197 * running as a GSource, and in that case aio_poll is used only
198 * with blocking == false, so this optimization is already quite
199 * effective. However, the code is ugly and should be restructured
200 * to have a single aio_dispatch() call. To do this, we need to
201 * reorganize aio_poll into a prepare/poll/dispatch model like
204 * If we're in a nested event loop, ctx->dispatching might be true.
205 * In that case we can restore it just before returning, but we
206 * have to clear it now.
208 aio_set_dispatching(ctx, !blocking);
211 * If there are callbacks left that have been queued, we need to call them.
212 * Do not call select in this case, because it is possible that the caller
213 * does not need a complete flush (as is the case for aio_poll loops).
215 if (aio_bh_poll(ctx)) {
220 /* Re-evaluate condition (1) above. */
221 aio_set_dispatching(ctx, !blocking);
222 if (aio_dispatch(ctx)) {
226 if (progress && !blocking) {
230 ctx->walking_handlers++;
232 g_array_set_size(ctx->pollfds, 0);
235 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
236 node->pollfds_idx = -1;
237 if (!node->deleted && node->pfd.events) {
240 .events = node->pfd.events,
242 node->pollfds_idx = ctx->pollfds->len;
243 g_array_append_val(ctx->pollfds, pfd);
247 ctx->walking_handlers--;
249 /* wait until next event */
250 ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
252 blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
254 /* if we have any readable fds, dispatch event */
256 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
257 if (node->pollfds_idx != -1) {
258 GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
260 node->pfd.revents = pfd->revents;
265 /* Run dispatch even if there were no readable fds to run timers */
266 aio_set_dispatching(ctx, true);
267 if (aio_dispatch(ctx)) {
272 aio_set_dispatching(ctx, was_dispatching);