aio-posix.c

   1 /*
   2  * QEMU aio implementation
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  * Contributions after 2012-01-13 are licensed under the terms of the
  13  * GNU GPL, version 2 or (at your option) any later version.
  14  */
  15
  16 #include "qemu-common.h"
  17 #include "block/block.h"
  18 #include "qemu/queue.h"
  19 #include "qemu/sockets.h"
  20
  21 struct AioHandler
  22 {
  23     GPollFD pfd;
  24     IOHandler *io_read;
  25     IOHandler *io_write;
  26     int deleted;
  27     int pollfds_idx;
  28     void *opaque;
  29     QLIST_ENTRY(AioHandler) node;
  30 };
  31
  32 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
  33 {
  34     AioHandler *node;
  35
  36     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  37         if (node->pfd.fd == fd)
  38             if (!node->deleted)
  39                 return node;
  40     }
  41
  42     return NULL;
  43 }
  44
  45 void aio_set_fd_handler(AioContext *ctx,
  46                         int fd,
  47                         IOHandler *io_read,
  48                         IOHandler *io_write,
  49                         void *opaque)
  50 {
  51     AioHandler *node;
  52
  53     node = find_aio_handler(ctx, fd);
  54
  55     /* Are we deleting the fd handler? */
  56     if (!io_read && !io_write) {
  57         if (node) {
  58             g_source_remove_poll(&ctx->source, &node->pfd);
  59
  60             /* If the lock is held, just mark the node as deleted */
  61             if (ctx->walking_handlers) {
  62                 node->deleted = 1;
  63                 node->pfd.revents = 0;
  64             } else {
  65                 /* Otherwise, delete it for real.  We can't just mark it as
  66                  * deleted because deleted nodes are only cleaned up after
  67                  * releasing the walking_handlers lock.
  68                  */
  69                 QLIST_REMOVE(node, node);
  70                 g_free(node);
  71             }
  72         }
  73     } else {
  74         if (node == NULL) {
  75             /* Alloc and insert if it's not already there */
  76             node = g_malloc0(sizeof(AioHandler));
  77             node->pfd.fd = fd;
  78             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
  79
  80             g_source_add_poll(&ctx->source, &node->pfd);
  81         }
  82         /* Update handler with latest information */
  83         node->io_read = io_read;
  84         node->io_write = io_write;
  85         node->opaque = opaque;
  86         node->pollfds_idx = -1;
  87
  88         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
  89         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
  90     }
  91
  92     aio_notify(ctx);
  93 }
  94
  95 void aio_set_event_notifier(AioContext *ctx,
  96                             EventNotifier *notifier,
  97                             EventNotifierHandler *io_read)
  98 {
  99     aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
 100                        (IOHandler *)io_read, NULL, notifier);
 101 }
 102
 103 bool aio_pending(AioContext *ctx)
 104 {
 105     AioHandler *node;
 106
 107     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
 108         int revents;
 109
 110         revents = node->pfd.revents & node->pfd.events;
 111         if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
 112             return true;
 113         }
 114         if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
 115             return true;
 116         }
 117     }
 118
 119     return false;
 120 }
 121
 122 static bool aio_dispatch(AioContext *ctx)
 123 {
 124     AioHandler *node;
 125     bool progress = false;
 126
 127     /*
 128      * We have to walk very carefully in case aio_set_fd_handler is
 129      * called while we're walking.
 130      */
 131     node = QLIST_FIRST(&ctx->aio_handlers);
 132     while (node) {
 133         AioHandler *tmp;
 134         int revents;
 135
 136         ctx->walking_handlers++;
 137
 138         revents = node->pfd.revents & node->pfd.events;
 139         node->pfd.revents = 0;
 140
 141         if (!node->deleted &&
 142             (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
 143             node->io_read) {
 144             node->io_read(node->opaque);
 145
 146             /* aio_notify() does not count as progress */
 147             if (node->opaque != &ctx->notifier) {
 148                 progress = true;
 149             }
 150         }
 151         if (!node->deleted &&
 152             (revents & (G_IO_OUT | G_IO_ERR)) &&
 153             node->io_write) {
 154             node->io_write(node->opaque);
 155             progress = true;
 156         }
 157
 158         tmp = node;
 159         node = QLIST_NEXT(node, node);
 160
 161         ctx->walking_handlers--;
 162
 163         if (!ctx->walking_handlers && tmp->deleted) {
 164             QLIST_REMOVE(tmp, node);
 165             g_free(tmp);
 166         }
 167     }
 168
 169     /* Run our timers */
 170     progress |= timerlistgroup_run_timers(&ctx->tlg);
 171
 172     return progress;
 173 }
 174
 175 bool aio_poll(AioContext *ctx, bool blocking)
 176 {
 177     AioHandler *node;
 178     bool was_dispatching;
 179     int ret;
 180     bool progress;
 181
 182     was_dispatching = ctx->dispatching;
 183     progress = false;
 184
 185     /* aio_notify can avoid the expensive event_notifier_set if
 186      * everything (file descriptors, bottom halves, timers) will
 187      * be re-evaluated before the next blocking poll().  This happens
 188      * in two cases:
 189      *
 190      * 1) when aio_poll is called with blocking == false
 191      *
 192      * 2) when we are called after poll().  If we are called before
 193      *    poll(), bottom halves will not be re-evaluated and we need
 194      *    aio_notify() if blocking == true.
 195      *
 196      * The first aio_dispatch() only does something when AioContext is
 197      * running as a GSource, and in that case aio_poll is used only
 198      * with blocking == false, so this optimization is already quite
 199      * effective.  However, the code is ugly and should be restructured
 200      * to have a single aio_dispatch() call.  To do this, we need to
 201      * reorganize aio_poll into a prepare/poll/dispatch model like
 202      * glib's.
 203      *
 204      * If we're in a nested event loop, ctx->dispatching might be true.
 205      * In that case we can restore it just before returning, but we
 206      * have to clear it now.
 207      */
 208     aio_set_dispatching(ctx, !blocking);
 209
 210     /*
 211      * If there are callbacks left that have been queued, we need to call them.
 212      * Do not call select in this case, because it is possible that the caller
 213      * does not need a complete flush (as is the case for aio_poll loops).
 214      */
 215     if (aio_bh_poll(ctx)) {
 216         blocking = false;
 217         progress = true;
 218     }
 219
 220     /* Re-evaluate condition (1) above.  */
 221     aio_set_dispatching(ctx, !blocking);
 222     if (aio_dispatch(ctx)) {
 223         progress = true;
 224     }
 225
 226     if (progress && !blocking) {
 227         goto out;
 228     }
 229
 230     ctx->walking_handlers++;
 231
 232     g_array_set_size(ctx->pollfds, 0);
 233
 234     /* fill pollfds */
 235     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
 236         node->pollfds_idx = -1;
 237         if (!node->deleted && node->pfd.events) {
 238             GPollFD pfd = {
 239                 .fd = node->pfd.fd,
 240                 .events = node->pfd.events,
 241             };
 242             node->pollfds_idx = ctx->pollfds->len;
 243             g_array_append_val(ctx->pollfds, pfd);
 244         }
 245     }
 246
 247     ctx->walking_handlers--;
 248
 249     /* wait until next event */
 250     ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
 251                          ctx->pollfds->len,
 252                          blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
 253
 254     /* if we have any readable fds, dispatch event */
 255     if (ret > 0) {
 256         QLIST_FOREACH(node, &ctx->aio_handlers, node) {
 257             if (node->pollfds_idx != -1) {
 258                 GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
 259                                               node->pollfds_idx);
 260                 node->pfd.revents = pfd->revents;
 261             }
 262         }
 263     }
 264
 265     /* Run dispatch even if there were no readable fds to run timers */
 266     aio_set_dispatching(ctx, true);
 267     if (aio_dispatch(ctx)) {
 268         progress = true;
 269     }
 270
 271 out:
 272     aio_set_dispatching(ctx, was_dispatching);
 273     return progress;
 274 }