X-Git-Url: http://review.tizen.org/git/?p=sdk%2Femulator%2Fqemu.git;a=blobdiff_plain;f=block-migration.c;h=7def8ab197719e7f16f075a7ee404df8532fface;hp=8218bac09c6cd30286a4ce5b826ac378700036da;hb=76e8f6a91c13cc08a8edf19e9b25d97297fb563d;hpb=df8b863e2f7219eb3356ecc12635811241e38aad diff --git a/block-migration.c b/block-migration.c index 8218bac..7def8ab 100644 --- a/block-migration.c +++ b/block-migration.c @@ -9,6 +9,8 @@ * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. */ #include "qemu-common.h" @@ -16,7 +18,6 @@ #include "hw/hw.h" #include "qemu-queue.h" #include "qemu-timer.h" -#include "monitor.h" #include "block-migration.h" #include "migration.h" #include "blockdev.h" @@ -62,7 +63,6 @@ typedef struct BlkMigBlock { QEMUIOVector qiov; BlockDriverAIOCB *aiocb; int ret; - int64_t time; QSIMPLEQ_ENTRY(BlkMigBlock) entry; } BlkMigBlock; @@ -78,6 +78,7 @@ typedef struct BlkMigState { int prev_progress; int bulk_completed; long double total_time; + long double prev_time_offset; int reads; } BlkMigState; @@ -131,16 +132,10 @@ uint64_t blk_mig_bytes_total(void) return sum << BDRV_SECTOR_BITS; } -static inline void add_avg_read_time(int64_t time) -{ - block_mig_state.reads++; - block_mig_state.total_time += time; -} - static inline long double compute_read_bwidth(void) { assert(block_mig_state.total_time != 0); - return (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time; + return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE; } static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) @@ -186,18 +181,19 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds) BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; - bmds->aio_bitmap = qemu_mallocz(bitmap_size); + bmds->aio_bitmap = g_malloc0(bitmap_size); } static void blk_mig_read_cb(void *opaque, int ret) { + long double curr_time = qemu_get_clock_ns(rt_clock); BlkMigBlock *blk = opaque; blk->ret = ret; - blk->time = qemu_get_clock_ns(rt_clock) - blk->time; - - add_avg_read_time(blk->time); + block_mig_state.reads++; + block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset); + block_mig_state.prev_time_offset = curr_time; QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry); bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0); @@ -207,8 +203,7 @@ static void blk_mig_read_cb(void *opaque, int ret) assert(block_mig_state.submitted >= 0); } -static int mig_save_device_bulk(Monitor *mon, QEMUFile *f, - BlkMigDevState *bmds) +static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) { int64_t total_sectors = bmds->total_sectors; int64_t cur_sector = bmds->cur_sector; @@ -240,8 +235,8 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f, nr_sectors = total_sectors - cur_sector; } - blk = qemu_malloc(sizeof(BlkMigBlock)); - blk->buf = qemu_malloc(BLOCK_SIZE); + blk = g_malloc(sizeof(BlkMigBlock)); + blk->buf = g_malloc(BLOCK_SIZE); blk->bmds = bmds; blk->sector = cur_sector; blk->nr_sectors = nr_sectors; @@ -250,26 +245,18 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f, blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - blk->time = qemu_get_clock_ns(rt_clock); + if (block_mig_state.submitted == 0) { + block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock); + } blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - if (!blk->aiocb) { - goto error; - } block_mig_state.submitted++; bdrv_reset_dirty(bs, cur_sector, nr_sectors); bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); - -error: - monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector); - qemu_file_set_error(f); - qemu_free(blk->buf); - qemu_free(blk); - return 0; } static void set_dirty_tracking(int enable) @@ -283,7 +270,6 @@ static void set_dirty_tracking(int enable) static void init_blk_migration_it(void *opaque, BlockDriverState *bs) { - Monitor *mon = opaque; BlkMigDevState *bmds; int64_t sectors; @@ -293,7 +279,7 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) return; } - bmds = qemu_mallocz(sizeof(BlkMigDevState)); + bmds = g_malloc0(sizeof(BlkMigDevState)); bmds->bs = bs; bmds->bulk_completed = 0; bmds->total_sectors = sectors; @@ -306,19 +292,17 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) block_mig_state.total_sector_sum += sectors; if (bmds->shared_base) { - monitor_printf(mon, "Start migration for %s with shared base " - "image\n", - bs->device_name); + DPRINTF("Start migration for %s with shared base image\n", + bs->device_name); } else { - monitor_printf(mon, "Start full migration for %s\n", - bs->device_name); + DPRINTF("Start full migration for %s\n", bs->device_name); } QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry); } } -static void init_blk_migration(Monitor *mon, QEMUFile *f) +static void init_blk_migration(QEMUFile *f) { block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -329,10 +313,10 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f) block_mig_state.total_time = 0; block_mig_state.reads = 0; - bdrv_iterate(init_blk_migration_it, mon); + bdrv_iterate(init_blk_migration_it, NULL); } -static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f) +static int blk_mig_save_bulked_block(QEMUFile *f) { int64_t completed_sector_sum = 0; BlkMigDevState *bmds; @@ -341,7 +325,7 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f) QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { if (bmds->bulk_completed == 0) { - if (mig_save_device_bulk(mon, f, bmds) == 1) { + if (mig_save_device_bulk(f, bmds) == 1) { /* completed bulk section for this device */ bmds->bulk_completed = 1; } @@ -363,8 +347,7 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f) block_mig_state.prev_progress = progress; qemu_put_be64(f, (progress << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); - monitor_printf(mon, "Completed %d %%\r", progress); - monitor_flush(mon); + DPRINTF("Completed %d %%\r", progress); } return ret; @@ -379,17 +362,18 @@ static void blk_mig_reset_dirty_cursor(void) } } -static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, - BlkMigDevState *bmds, int is_async) +static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, + int is_async) { BlkMigBlock *blk; int64_t total_sectors = bmds->total_sectors; int64_t sector; int nr_sectors; + int ret = -EIO; for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) { if (bmds_aio_inflight(bmds, sector)) { - qemu_aio_flush(); + bdrv_drain_all(); } if (bdrv_get_dirty(bmds->bs, sector)) { @@ -398,8 +382,8 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, } else { nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - blk = qemu_malloc(sizeof(BlkMigBlock)); - blk->buf = qemu_malloc(BLOCK_SIZE); + blk = g_malloc(sizeof(BlkMigBlock)); + blk->buf = g_malloc(BLOCK_SIZE); blk->bmds = bmds; blk->sector = sector; blk->nr_sectors = nr_sectors; @@ -409,24 +393,23 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - blk->time = qemu_get_clock_ns(rt_clock); + if (block_mig_state.submitted == 0) { + block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock); + } blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - if (!blk->aiocb) { - goto error; - } block_mig_state.submitted++; bmds_set_aio_inflight(bmds, sector, nr_sectors, 1); } else { - if (bdrv_read(bmds->bs, sector, blk->buf, - nr_sectors) < 0) { + ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); + if (ret < 0) { goto error; } blk_send(f, blk); - qemu_free(blk->buf); - qemu_free(blk); + g_free(blk->buf); + g_free(blk); } bdrv_reset_dirty(bmds->bs, sector, nr_sectors); @@ -439,20 +422,20 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, return (bmds->cur_dirty >= bmds->total_sectors); error: - monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector); - qemu_file_set_error(f); - qemu_free(blk->buf); - qemu_free(blk); + DPRINTF("Error reading sector %" PRId64 "\n", sector); + qemu_file_set_error(f, ret); + g_free(blk->buf); + g_free(blk); return 0; } -static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async) +static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) { BlkMigDevState *bmds; int ret = 0; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) { + if (mig_save_device_dirty(f, bmds, is_async) == 0) { ret = 1; break; } @@ -474,14 +457,14 @@ static void flush_blks(QEMUFile* f) break; } if (blk->ret < 0) { - qemu_file_set_error(f); + qemu_file_set_error(f, blk->ret); break; } blk_send(f, blk); QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); - qemu_free(blk->buf); - qemu_free(blk); + g_free(blk->buf); + g_free(blk); block_mig_state.read_done--; block_mig_state.transferred++; @@ -521,7 +504,7 @@ static int is_stage2_completed(void) if ((remaining_dirty / bwidth) <= migrate_max_downtime()) { - /* finish stage2 because we think that we can finish remaing work + /* finish stage2 because we think that we can finish remaining work below max_downtime */ return 1; @@ -531,7 +514,7 @@ static int is_stage2_completed(void) return 0; } -static void blk_mig_cleanup(Monitor *mon) +static void blk_mig_cleanup(void) { BlkMigDevState *bmds; BlkMigBlock *blk; @@ -542,99 +525,136 @@ static void blk_mig_cleanup(Monitor *mon) QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); bdrv_set_in_use(bmds->bs, 0); drive_put_ref(drive_get_by_blockdev(bmds->bs)); - qemu_free(bmds->aio_bitmap); - qemu_free(bmds); + g_free(bmds->aio_bitmap); + g_free(bmds); } while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); - qemu_free(blk->buf); - qemu_free(blk); + g_free(blk->buf); + g_free(blk); } +} - monitor_printf(mon, "\n"); +static void block_migration_cancel(void *opaque) +{ + blk_mig_cleanup(); } -static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) +static int block_save_setup(QEMUFile *f, void *opaque) { - DPRINTF("Enter save live stage %d submitted %d transferred %d\n", - stage, block_mig_state.submitted, block_mig_state.transferred); + int ret; - if (stage < 0) { - blk_mig_cleanup(mon); - return 0; - } + DPRINTF("Enter save live setup submitted %d transferred %d\n", + block_mig_state.submitted, block_mig_state.transferred); - if (block_mig_state.blk_enable != 1) { - /* no need to migrate storage */ - qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return 1; - } + init_blk_migration(f); - if (stage == 1) { - init_blk_migration(mon, f); + /* start track dirty blocks */ + set_dirty_tracking(1); - /* start track dirty blocks */ - set_dirty_tracking(1); + flush_blks(f); + + ret = qemu_file_get_error(f); + if (ret) { + blk_mig_cleanup(); + return ret; } + blk_mig_reset_dirty_cursor(); + + qemu_put_be64(f, BLK_MIG_FLAG_EOS); + + return 0; +} + +static int block_save_iterate(QEMUFile *f, void *opaque) +{ + int ret; + + DPRINTF("Enter save live iterate submitted %d transferred %d\n", + block_mig_state.submitted, block_mig_state.transferred); + flush_blks(f); - if (qemu_file_has_error(f)) { - blk_mig_cleanup(mon); - return 0; + ret = qemu_file_get_error(f); + if (ret) { + blk_mig_cleanup(); + return ret; } blk_mig_reset_dirty_cursor(); - if (stage == 2) { - /* control the rate of transfer */ - while ((block_mig_state.submitted + - block_mig_state.read_done) * BLOCK_SIZE < - qemu_file_get_rate_limit(f)) { - if (block_mig_state.bulk_completed == 0) { - /* first finish the bulk phase */ - if (blk_mig_save_bulked_block(mon, f) == 0) { - /* finished saving bulk on all devices */ - block_mig_state.bulk_completed = 1; - } - } else { - if (blk_mig_save_dirty_block(mon, f, 1) == 0) { - /* no more dirty blocks */ - break; - } + /* control the rate of transfer */ + while ((block_mig_state.submitted + + block_mig_state.read_done) * BLOCK_SIZE < + qemu_file_get_rate_limit(f)) { + if (block_mig_state.bulk_completed == 0) { + /* first finish the bulk phase */ + if (blk_mig_save_bulked_block(f) == 0) { + /* finished saving bulk on all devices */ + block_mig_state.bulk_completed = 1; + } + } else { + if (blk_mig_save_dirty_block(f, 1) == 0) { + /* no more dirty blocks */ + break; } } + } - flush_blks(f); + flush_blks(f); - if (qemu_file_has_error(f)) { - blk_mig_cleanup(mon); - return 0; - } + ret = qemu_file_get_error(f); + if (ret) { + blk_mig_cleanup(); + return ret; } - if (stage == 3) { - /* we know for sure that save bulk is completed and - all async read completed */ - assert(block_mig_state.submitted == 0); + qemu_put_be64(f, BLK_MIG_FLAG_EOS); + + return is_stage2_completed(); +} - while (blk_mig_save_dirty_block(mon, f, 0) != 0); - blk_mig_cleanup(mon); +static int block_save_complete(QEMUFile *f, void *opaque) +{ + int ret; - /* report completion */ - qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); + DPRINTF("Enter save live complete submitted %d transferred %d\n", + block_mig_state.submitted, block_mig_state.transferred); - if (qemu_file_has_error(f)) { - return 0; - } + flush_blks(f); + + ret = qemu_file_get_error(f); + if (ret) { + blk_mig_cleanup(); + return ret; + } + + blk_mig_reset_dirty_cursor(); + + /* we know for sure that save bulk is completed and + all async read completed */ + assert(block_mig_state.submitted == 0); + + while (blk_mig_save_dirty_block(f, 0) != 0) { + /* Do nothing */ + } + blk_mig_cleanup(); - monitor_printf(mon, "Block migration completed\n"); + /* report completion */ + qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); + + ret = qemu_file_get_error(f); + if (ret) { + return ret; } + DPRINTF("Block migration completed\n"); + qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return ((stage == 2) && is_stage2_completed()); + return 0; } static int block_load(QEMUFile *f, void *opaque, int version_id) @@ -647,6 +667,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) uint8_t *buf; int64_t total_sectors = 0; int nr_sectors; + int ret; do { addr = qemu_get_be64(f); @@ -655,7 +676,6 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) addr >>= BDRV_SECTOR_BITS; if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) { - int ret; /* get device name */ len = qemu_get_byte(f); qemu_get_buffer(f, (uint8_t *)device_name, len); @@ -672,7 +692,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) bs_prev = bs; total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; if (total_sectors <= 0) { - error_report("Error getting length of block device %s\n", + error_report("Error getting length of block device %s", device_name); return -EINVAL; } @@ -684,12 +704,12 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - buf = qemu_malloc(BLOCK_SIZE); + buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); ret = bdrv_write(bs, addr, buf, nr_sectors); - qemu_free(buf); + g_free(buf); if (ret < 0) { return ret; } @@ -705,28 +725,44 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) fprintf(stderr, "Unknown flags\n"); return -EINVAL; } - if (qemu_file_has_error(f)) { - return -EIO; + ret = qemu_file_get_error(f); + if (ret != 0) { + return ret; } } while (!(flags & BLK_MIG_FLAG_EOS)); return 0; } -static void block_set_params(int blk_enable, int shared_base, void *opaque) +static void block_set_params(const MigrationParams *params, void *opaque) { - block_mig_state.blk_enable = blk_enable; - block_mig_state.shared_base = shared_base; + block_mig_state.blk_enable = params->blk; + block_mig_state.shared_base = params->shared; /* shared base means that blk_enable = 1 */ - block_mig_state.blk_enable |= shared_base; + block_mig_state.blk_enable |= params->shared; } +static bool block_is_active(void *opaque) +{ + return block_mig_state.blk_enable == 1; +} + +SaveVMHandlers savevm_block_handlers = { + .set_params = block_set_params, + .save_live_setup = block_save_setup, + .save_live_iterate = block_save_iterate, + .save_live_complete = block_save_complete, + .load_state = block_load, + .cancel = block_migration_cancel, + .is_active = block_is_active, +}; + void blk_mig_init(void) { QSIMPLEQ_INIT(&block_mig_state.bmds_list); QSIMPLEQ_INIT(&block_mig_state.blk_list); - register_savevm_live(NULL, "block", 0, 1, block_set_params, - block_save_live, NULL, block_load, &block_mig_state); + register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, + &block_mig_state); }