2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "block/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
44 #include <sys/types.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
58 * A BdrvDirtyBitmap can be in three possible states:
59 * (1) successor is NULL and disabled is false: full r/w mode
60 * (2) successor is NULL and disabled is true: read only mode ("disabled")
61 * (3) successor is set: frozen mode.
62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63 * or enabled. A frozen bitmap can only abdicate() or reclaim().
65 struct BdrvDirtyBitmap {
66 HBitmap *bitmap; /* Dirty sector bitmap implementation */
67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68 char *name; /* Optional non-empty unique ID */
69 int64_t size; /* Size of the bitmap (Number of sectors) */
70 bool disabled; /* Bitmap is read-only */
71 QLIST_ENTRY(BdrvDirtyBitmap) list;
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role,
89 BlockDriver *drv, Error **errp);
91 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
92 /* If non-zero, use only whitelisted block drivers */
93 static int use_bdrv_whitelist;
96 static int is_windows_drive_prefix(const char *filename)
98 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
99 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
103 int is_windows_drive(const char *filename)
105 if (is_windows_drive_prefix(filename) &&
108 if (strstart(filename, "\\\\.\\", NULL) ||
109 strstart(filename, "//./", NULL))
115 size_t bdrv_opt_mem_align(BlockDriverState *bs)
117 if (!bs || !bs->drv) {
118 /* page size or 4k (hdd sector size) should be on the safe side */
119 return MAX(4096, getpagesize());
122 return bs->bl.opt_mem_alignment;
125 size_t bdrv_min_mem_align(BlockDriverState *bs)
127 if (!bs || !bs->drv) {
128 /* page size or 4k (hdd sector size) should be on the safe side */
129 return MAX(4096, getpagesize());
132 return bs->bl.min_mem_alignment;
135 /* check if the path starts with "<protocol>:" */
136 int path_has_protocol(const char *path)
141 if (is_windows_drive(path) ||
142 is_windows_drive_prefix(path)) {
145 p = path + strcspn(path, ":/\\");
147 p = path + strcspn(path, ":/");
153 int path_is_absolute(const char *path)
156 /* specific case for names like: "\\.\d:" */
157 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
160 return (*path == '/' || *path == '\\');
162 return (*path == '/');
166 /* if filename is absolute, just copy it to dest. Otherwise, build a
167 path to it by considering it is relative to base_path. URL are
169 void path_combine(char *dest, int dest_size,
170 const char *base_path,
171 const char *filename)
178 if (path_is_absolute(filename)) {
179 pstrcpy(dest, dest_size, filename);
181 p = strchr(base_path, ':');
186 p1 = strrchr(base_path, '/');
190 p2 = strrchr(base_path, '\\');
202 if (len > dest_size - 1)
204 memcpy(dest, base_path, len);
206 pstrcat(dest, dest_size, filename);
210 void bdrv_get_full_backing_filename_from_filename(const char *backed,
212 char *dest, size_t sz,
215 if (backing[0] == '\0' || path_has_protocol(backing) ||
216 path_is_absolute(backing))
218 pstrcpy(dest, sz, backing);
219 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
220 error_setg(errp, "Cannot use relative backing file names for '%s'",
223 path_combine(dest, sz, backed, backing);
227 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
230 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
232 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
236 void bdrv_register(BlockDriver *bdrv)
238 bdrv_setup_io_funcs(bdrv);
240 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
243 BlockDriverState *bdrv_new_root(void)
245 BlockDriverState *bs = bdrv_new();
247 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
251 BlockDriverState *bdrv_new(void)
253 BlockDriverState *bs;
256 bs = g_new0(BlockDriverState, 1);
257 QLIST_INIT(&bs->dirty_bitmaps);
258 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
259 QLIST_INIT(&bs->op_blockers[i]);
261 bdrv_iostatus_disable(bs);
262 notifier_list_init(&bs->close_notifiers);
263 notifier_with_return_list_init(&bs->before_write_notifiers);
264 qemu_co_queue_init(&bs->throttled_reqs[0]);
265 qemu_co_queue_init(&bs->throttled_reqs[1]);
267 bs->aio_context = qemu_get_aio_context();
272 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
274 notifier_list_add(&bs->close_notifiers, notify);
277 BlockDriver *bdrv_find_format(const char *format_name)
280 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
281 if (!strcmp(drv1->format_name, format_name)) {
288 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
290 static const char *whitelist_rw[] = {
291 CONFIG_BDRV_RW_WHITELIST
293 static const char *whitelist_ro[] = {
294 CONFIG_BDRV_RO_WHITELIST
298 if (!whitelist_rw[0] && !whitelist_ro[0]) {
299 return 1; /* no whitelist, anything goes */
302 for (p = whitelist_rw; *p; p++) {
303 if (!strcmp(drv->format_name, *p)) {
308 for (p = whitelist_ro; *p; p++) {
309 if (!strcmp(drv->format_name, *p)) {
317 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
320 BlockDriver *drv = bdrv_find_format(format_name);
321 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
324 typedef struct CreateCo {
332 static void coroutine_fn bdrv_create_co_entry(void *opaque)
334 Error *local_err = NULL;
337 CreateCo *cco = opaque;
340 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
342 error_propagate(&cco->err, local_err);
347 int bdrv_create(BlockDriver *drv, const char* filename,
348 QemuOpts *opts, Error **errp)
355 .filename = g_strdup(filename),
361 if (!drv->bdrv_create) {
362 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
367 if (qemu_in_coroutine()) {
368 /* Fast-path if already in coroutine context */
369 bdrv_create_co_entry(&cco);
371 co = qemu_coroutine_create(bdrv_create_co_entry);
372 qemu_coroutine_enter(co, &cco);
373 while (cco.ret == NOT_DONE) {
374 aio_poll(qemu_get_aio_context(), true);
381 error_propagate(errp, cco.err);
383 error_setg_errno(errp, -ret, "Could not create image");
388 g_free(cco.filename);
392 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
395 Error *local_err = NULL;
398 drv = bdrv_find_protocol(filename, true, errp);
403 ret = bdrv_create(drv, filename, opts, &local_err);
405 error_propagate(errp, local_err);
411 * Try to get @bs's logical and physical block size.
412 * On success, store them in @bsz struct and return 0.
413 * On failure return -errno.
414 * @bs must not be empty.
416 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
418 BlockDriver *drv = bs->drv;
420 if (drv && drv->bdrv_probe_blocksizes) {
421 return drv->bdrv_probe_blocksizes(bs, bsz);
428 * Try to get @bs's geometry (cyls, heads, sectors).
429 * On success, store them in @geo struct and return 0.
430 * On failure return -errno.
431 * @bs must not be empty.
433 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
435 BlockDriver *drv = bs->drv;
437 if (drv && drv->bdrv_probe_geometry) {
438 return drv->bdrv_probe_geometry(bs, geo);
445 * Create a uniquely-named empty temporary file.
446 * Return 0 upon success, otherwise a negative errno value.
448 int get_tmp_filename(char *filename, int size)
451 char temp_dir[MAX_PATH];
452 /* GetTempFileName requires that its output buffer (4th param)
453 have length MAX_PATH or greater. */
454 assert(size >= MAX_PATH);
455 return (GetTempPath(MAX_PATH, temp_dir)
456 && GetTempFileName(temp_dir, "qem", 0, filename)
457 ? 0 : -GetLastError());
461 tmpdir = getenv("TMPDIR");
465 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
468 fd = mkstemp(filename);
472 if (close(fd) != 0) {
481 * Detect host devices. By convention, /dev/cdrom[N] is always
482 * recognized as a host CDROM.
484 static BlockDriver *find_hdev_driver(const char *filename)
486 int score_max = 0, score;
487 BlockDriver *drv = NULL, *d;
489 QLIST_FOREACH(d, &bdrv_drivers, list) {
490 if (d->bdrv_probe_device) {
491 score = d->bdrv_probe_device(filename);
492 if (score > score_max) {
502 BlockDriver *bdrv_find_protocol(const char *filename,
503 bool allow_protocol_prefix,
511 /* TODO Drivers without bdrv_file_open must be specified explicitly */
514 * XXX(hch): we really should not let host device detection
515 * override an explicit protocol specification, but moving this
516 * later breaks access to device names with colons in them.
517 * Thanks to the brain-dead persistent naming schemes on udev-
518 * based Linux systems those actually are quite common.
520 drv1 = find_hdev_driver(filename);
525 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
529 p = strchr(filename, ':');
532 if (len > sizeof(protocol) - 1)
533 len = sizeof(protocol) - 1;
534 memcpy(protocol, filename, len);
535 protocol[len] = '\0';
536 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
537 if (drv1->protocol_name &&
538 !strcmp(drv1->protocol_name, protocol)) {
543 error_setg(errp, "Unknown protocol '%s'", protocol);
548 * Guess image format by probing its contents.
549 * This is not a good idea when your image is raw (CVE-2008-2004), but
550 * we do it anyway for backward compatibility.
552 * @buf contains the image's first @buf_size bytes.
553 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
554 * but can be smaller if the image file is smaller)
555 * @filename is its filename.
557 * For all block drivers, call the bdrv_probe() method to get its
559 * Return the first block driver with the highest probing score.
561 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
562 const char *filename)
564 int score_max = 0, score;
565 BlockDriver *drv = NULL, *d;
567 QLIST_FOREACH(d, &bdrv_drivers, list) {
569 score = d->bdrv_probe(buf, buf_size, filename);
570 if (score > score_max) {
580 static int find_image_format(BlockDriverState *bs, const char *filename,
581 BlockDriver **pdrv, Error **errp)
584 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
587 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
588 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
593 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
595 error_setg_errno(errp, -ret, "Could not read image for determining its "
601 drv = bdrv_probe_all(buf, ret, filename);
603 error_setg(errp, "Could not determine image format: No compatible "
612 * Set the current 'total_sectors' value
613 * Return 0 on success, -errno on error.
615 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
617 BlockDriver *drv = bs->drv;
619 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
623 /* query actual device if possible, otherwise just trust the hint */
624 if (drv->bdrv_getlength) {
625 int64_t length = drv->bdrv_getlength(bs);
629 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
632 bs->total_sectors = hint;
637 * Set open flags for a given discard mode
639 * Return 0 on success, -1 if the discard mode was invalid.
641 int bdrv_parse_discard_flags(const char *mode, int *flags)
643 *flags &= ~BDRV_O_UNMAP;
645 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
647 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
648 *flags |= BDRV_O_UNMAP;
657 * Set open flags for a given cache mode
659 * Return 0 on success, -1 if the cache mode was invalid.
661 int bdrv_parse_cache_flags(const char *mode, int *flags)
663 *flags &= ~BDRV_O_CACHE_MASK;
665 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
666 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
667 } else if (!strcmp(mode, "directsync")) {
668 *flags |= BDRV_O_NOCACHE;
669 } else if (!strcmp(mode, "writeback")) {
670 *flags |= BDRV_O_CACHE_WB;
671 } else if (!strcmp(mode, "unsafe")) {
672 *flags |= BDRV_O_CACHE_WB;
673 *flags |= BDRV_O_NO_FLUSH;
674 } else if (!strcmp(mode, "writethrough")) {
675 /* this is the default */
684 * Returns the flags that a temporary snapshot should get, based on the
685 * originally requested flags (the originally requested image will have flags
686 * like a backing file)
688 static int bdrv_temp_snapshot_flags(int flags)
690 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
694 * Returns the flags that bs->file should get if a protocol driver is expected,
695 * based on the given flags for the parent BDS
697 static int bdrv_inherited_flags(int flags)
699 /* Enable protocol handling, disable format probing for bs->file */
700 flags |= BDRV_O_PROTOCOL;
702 /* Our block drivers take care to send flushes and respect unmap policy,
703 * so we can enable both unconditionally on lower layers. */
704 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
706 /* Clear flags that only apply to the top layer */
707 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
712 const BdrvChildRole child_file = {
713 .inherit_flags = bdrv_inherited_flags,
717 * Returns the flags that bs->file should get if the use of formats (and not
718 * only protocols) is permitted for it, based on the given flags for the parent
721 static int bdrv_inherited_fmt_flags(int parent_flags)
723 int flags = child_file.inherit_flags(parent_flags);
724 return flags & ~BDRV_O_PROTOCOL;
727 const BdrvChildRole child_format = {
728 .inherit_flags = bdrv_inherited_fmt_flags,
732 * Returns the flags that bs->backing_hd should get, based on the given flags
735 static int bdrv_backing_flags(int flags)
737 /* backing files always opened read-only */
738 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
740 /* snapshot=on is handled on the top layer */
741 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
746 static const BdrvChildRole child_backing = {
747 .inherit_flags = bdrv_backing_flags,
750 static int bdrv_open_flags(BlockDriverState *bs, int flags)
752 int open_flags = flags | BDRV_O_CACHE_WB;
755 * Clear flags that are internal to the block layer before opening the
758 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
761 * Snapshots should be writable.
763 if (flags & BDRV_O_TEMPORARY) {
764 open_flags |= BDRV_O_RDWR;
770 static void bdrv_assign_node_name(BlockDriverState *bs,
771 const char *node_name,
778 /* Check for empty string or invalid characters */
779 if (!id_wellformed(node_name)) {
780 error_setg(errp, "Invalid node name");
784 /* takes care of avoiding namespaces collisions */
785 if (blk_by_name(node_name)) {
786 error_setg(errp, "node-name=%s is conflicting with a device id",
791 /* takes care of avoiding duplicates node names */
792 if (bdrv_find_node(node_name)) {
793 error_setg(errp, "Duplicate node name");
797 /* copy node name into the bs and insert it into the graph list */
798 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
799 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
802 static QemuOptsList bdrv_runtime_opts = {
803 .name = "bdrv_common",
804 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
808 .type = QEMU_OPT_STRING,
809 .help = "Node name of the block device node",
811 { /* end of list */ }
816 * Common part for opening disk images and files
818 * Removes all processed options from *options.
820 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
821 QDict *options, int flags, BlockDriver *drv, Error **errp)
824 const char *filename;
825 const char *node_name = NULL;
827 Error *local_err = NULL;
830 assert(bs->file == NULL);
831 assert(options != NULL && bs->options != options);
834 filename = file->filename;
836 filename = qdict_get_try_str(options, "filename");
839 if (drv->bdrv_needs_filename && !filename) {
840 error_setg(errp, "The '%s' block driver requires a file name",
845 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
847 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
848 qemu_opts_absorb_qdict(opts, options, &local_err);
850 error_propagate(errp, local_err);
855 node_name = qemu_opt_get(opts, "node-name");
856 bdrv_assign_node_name(bs, node_name, &local_err);
858 error_propagate(errp, local_err);
863 bs->guest_block_size = 512;
864 bs->request_alignment = 512;
865 bs->zero_beyond_eof = true;
866 open_flags = bdrv_open_flags(bs, flags);
867 bs->read_only = !(open_flags & BDRV_O_RDWR);
869 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
871 !bs->read_only && bdrv_is_whitelisted(drv, true)
872 ? "Driver '%s' can only be used for read-only devices"
873 : "Driver '%s' is not whitelisted",
879 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
880 if (flags & BDRV_O_COPY_ON_READ) {
881 if (!bs->read_only) {
882 bdrv_enable_copy_on_read(bs);
884 error_setg(errp, "Can't use copy-on-read on read-only device");
890 if (filename != NULL) {
891 pstrcpy(bs->filename, sizeof(bs->filename), filename);
893 bs->filename[0] = '\0';
895 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
898 bs->opaque = g_malloc0(drv->instance_size);
900 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
902 /* Open the image, either directly or using a protocol */
903 if (drv->bdrv_file_open) {
904 assert(file == NULL);
905 assert(!drv->bdrv_needs_filename || filename != NULL);
906 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
909 error_setg(errp, "Can't use '%s' as a block driver for the "
910 "protocol level", drv->format_name);
915 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
920 error_propagate(errp, local_err);
921 } else if (bs->filename[0]) {
922 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
924 error_setg_errno(errp, -ret, "Could not open image");
930 error_report("Encrypted images are deprecated");
931 error_printf("Support for them will be removed in a future release.\n"
932 "You can use 'qemu-img convert' to convert your image"
933 " to an unencrypted one.\n");
936 ret = refresh_total_sectors(bs, bs->total_sectors);
938 error_setg_errno(errp, -ret, "Could not refresh total sector count");
942 bdrv_refresh_limits(bs, &local_err);
944 error_propagate(errp, local_err);
949 assert(bdrv_opt_mem_align(bs) != 0);
950 assert(bdrv_min_mem_align(bs) != 0);
951 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
966 static QDict *parse_json_filename(const char *filename, Error **errp)
968 QObject *options_obj;
972 ret = strstart(filename, "json:", &filename);
975 options_obj = qobject_from_json(filename);
977 error_setg(errp, "Could not parse the JSON options");
981 if (qobject_type(options_obj) != QTYPE_QDICT) {
982 qobject_decref(options_obj);
983 error_setg(errp, "Invalid JSON object given");
987 options = qobject_to_qdict(options_obj);
988 qdict_flatten(options);
994 * Fills in default options for opening images and converts the legacy
995 * filename/flags pair to option QDict entries.
996 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
997 * block driver has been specified explicitly.
999 static int bdrv_fill_options(QDict **options, const char **pfilename,
1000 int *flags, BlockDriver *drv, Error **errp)
1002 const char *filename = *pfilename;
1003 const char *drvname;
1004 bool protocol = *flags & BDRV_O_PROTOCOL;
1005 bool parse_filename = false;
1006 BlockDriver *tmp_drv;
1007 Error *local_err = NULL;
1009 /* Parse json: pseudo-protocol */
1010 if (filename && g_str_has_prefix(filename, "json:")) {
1011 QDict *json_options = parse_json_filename(filename, &local_err);
1013 error_propagate(errp, local_err);
1017 /* Options given in the filename have lower priority than options
1018 * specified directly */
1019 qdict_join(*options, json_options, false);
1020 QDECREF(json_options);
1021 *pfilename = filename = NULL;
1024 drvname = qdict_get_try_str(*options, "driver");
1026 /* If the user has explicitly specified the driver, this choice should
1027 * override the BDRV_O_PROTOCOL flag */
1029 if (!tmp_drv && drvname) {
1030 tmp_drv = bdrv_find_format(drvname);
1033 protocol = tmp_drv->bdrv_file_open;
1037 *flags |= BDRV_O_PROTOCOL;
1039 *flags &= ~BDRV_O_PROTOCOL;
1042 /* Fetch the file name from the options QDict if necessary */
1043 if (protocol && filename) {
1044 if (!qdict_haskey(*options, "filename")) {
1045 qdict_put(*options, "filename", qstring_from_str(filename));
1046 parse_filename = true;
1048 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1054 /* Find the right block driver */
1055 filename = qdict_get_try_str(*options, "filename");
1059 error_setg(errp, "Driver specified twice");
1062 drvname = drv->format_name;
1063 qdict_put(*options, "driver", qstring_from_str(drvname));
1065 if (!drvname && protocol) {
1067 drv = bdrv_find_protocol(filename, parse_filename, errp);
1072 drvname = drv->format_name;
1073 qdict_put(*options, "driver", qstring_from_str(drvname));
1075 error_setg(errp, "Must specify either driver or file");
1078 } else if (drvname) {
1079 drv = bdrv_find_format(drvname);
1081 error_setg(errp, "Unknown driver '%s'", drvname);
1087 assert(drv || !protocol);
1089 /* Driver-specific filename parsing */
1090 if (drv && drv->bdrv_parse_filename && parse_filename) {
1091 drv->bdrv_parse_filename(filename, *options, &local_err);
1093 error_propagate(errp, local_err);
1097 if (!drv->bdrv_needs_filename) {
1098 qdict_del(*options, "filename");
1105 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1106 BlockDriverState *child_bs,
1107 const BdrvChildRole *child_role)
1109 BdrvChild *child = g_new(BdrvChild, 1);
1110 *child = (BdrvChild) {
1115 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1120 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1123 if (bs->backing_hd) {
1124 assert(bs->backing_blocker);
1125 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1126 } else if (backing_hd) {
1127 error_setg(&bs->backing_blocker,
1128 "node is used as backing hd of '%s'",
1129 bdrv_get_device_or_node_name(bs));
1132 bs->backing_hd = backing_hd;
1134 error_free(bs->backing_blocker);
1135 bs->backing_blocker = NULL;
1138 bs->open_flags &= ~BDRV_O_NO_BACKING;
1139 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1140 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1141 backing_hd->drv ? backing_hd->drv->format_name : "");
1143 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1144 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1145 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1146 bs->backing_blocker);
1148 bdrv_refresh_limits(bs, NULL);
1152 * Opens the backing file for a BlockDriverState if not yet open
1154 * options is a QDict of options to pass to the block drivers, or NULL for an
1155 * empty set of options. The reference to the QDict is transferred to this
1156 * function (even on failure), so if the caller intends to reuse the dictionary,
1157 * it needs to use QINCREF() before calling bdrv_file_open.
1159 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1161 char *backing_filename = g_malloc0(PATH_MAX);
1163 BlockDriverState *backing_hd;
1164 Error *local_err = NULL;
1166 if (bs->backing_hd != NULL) {
1171 /* NULL means an empty set of options */
1172 if (options == NULL) {
1173 options = qdict_new();
1176 bs->open_flags &= ~BDRV_O_NO_BACKING;
1177 if (qdict_haskey(options, "file.filename")) {
1178 backing_filename[0] = '\0';
1179 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1183 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1187 error_propagate(errp, local_err);
1193 if (!bs->drv || !bs->drv->supports_backing) {
1195 error_setg(errp, "Driver doesn't support backing files");
1200 backing_hd = bdrv_new();
1202 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1203 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1206 assert(bs->backing_hd == NULL);
1207 ret = bdrv_open_inherit(&backing_hd,
1208 *backing_filename ? backing_filename : NULL,
1209 NULL, options, 0, bs, &child_backing,
1212 bdrv_unref(backing_hd);
1214 bs->open_flags |= BDRV_O_NO_BACKING;
1215 error_setg(errp, "Could not open backing file: %s",
1216 error_get_pretty(local_err));
1217 error_free(local_err);
1221 bdrv_attach_child(bs, backing_hd, &child_backing);
1222 bdrv_set_backing_hd(bs, backing_hd);
1225 g_free(backing_filename);
1230 * Opens a disk image whose options are given as BlockdevRef in another block
1233 * If allow_none is true, no image will be opened if filename is false and no
1234 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1236 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1237 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1238 * itself, all options starting with "${bdref_key}." are considered part of the
1241 * The BlockdevRef will be removed from the options QDict.
1243 BdrvChild *bdrv_open_child(const char *filename,
1244 QDict *options, const char *bdref_key,
1245 BlockDriverState* parent,
1246 const BdrvChildRole *child_role,
1247 bool allow_none, Error **errp)
1249 BdrvChild *c = NULL;
1250 BlockDriverState *bs;
1251 QDict *image_options;
1253 char *bdref_key_dot;
1254 const char *reference;
1256 assert(child_role != NULL);
1258 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1259 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1260 g_free(bdref_key_dot);
1262 reference = qdict_get_try_str(options, bdref_key);
1263 if (!filename && !reference && !qdict_size(image_options)) {
1265 error_setg(errp, "A block device must be specified for \"%s\"",
1268 QDECREF(image_options);
1273 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1274 parent, child_role, NULL, errp);
1279 c = bdrv_attach_child(parent, bs, child_role);
1282 qdict_del(options, bdref_key);
1287 * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
1288 * a BdrvChild object.
1290 * If allow_none is true, no image will be opened if filename is false and no
1291 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1293 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1295 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1296 QDict *options, const char *bdref_key,
1297 BlockDriverState* parent, const BdrvChildRole *child_role,
1298 bool allow_none, Error **errp)
1300 Error *local_err = NULL;
1304 assert(*pbs == NULL);
1306 c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
1307 allow_none, &local_err);
1309 error_propagate(errp, local_err);
1320 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1322 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1323 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1325 QemuOpts *opts = NULL;
1326 QDict *snapshot_options;
1327 BlockDriverState *bs_snapshot;
1328 Error *local_err = NULL;
1331 /* if snapshot, we create a temporary backing file and open it
1332 instead of opening 'filename' directly */
1334 /* Get the required size from the image */
1335 total_size = bdrv_getlength(bs);
1336 if (total_size < 0) {
1338 error_setg_errno(errp, -total_size, "Could not get image size");
1342 /* Create the temporary image */
1343 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1345 error_setg_errno(errp, -ret, "Could not get temporary filename");
1349 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1351 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1352 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1353 qemu_opts_del(opts);
1355 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1356 "'%s': %s", tmp_filename,
1357 error_get_pretty(local_err));
1358 error_free(local_err);
1362 /* Prepare a new options QDict for the temporary file */
1363 snapshot_options = qdict_new();
1364 qdict_put(snapshot_options, "file.driver",
1365 qstring_from_str("file"));
1366 qdict_put(snapshot_options, "file.filename",
1367 qstring_from_str(tmp_filename));
1369 bs_snapshot = bdrv_new();
1371 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1372 flags, &bdrv_qcow2, &local_err);
1374 error_propagate(errp, local_err);
1378 bdrv_append(bs_snapshot, bs);
1381 g_free(tmp_filename);
1386 * Opens a disk image (raw, qcow2, vmdk, ...)
1388 * options is a QDict of options to pass to the block drivers, or NULL for an
1389 * empty set of options. The reference to the QDict belongs to the block layer
1390 * after the call (even on failure), so if the caller intends to reuse the
1391 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1393 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1394 * If it is not NULL, the referenced BDS will be reused.
1396 * The reference parameter may be used to specify an existing block device which
1397 * should be opened. If specified, neither options nor a filename may be given,
1398 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1400 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1401 const char *reference, QDict *options, int flags,
1402 BlockDriverState *parent,
1403 const BdrvChildRole *child_role,
1404 BlockDriver *drv, Error **errp)
1407 BlockDriverState *file = NULL, *bs;
1408 const char *drvname;
1409 Error *local_err = NULL;
1410 int snapshot_flags = 0;
1413 assert(!child_role || !flags);
1414 assert(!child_role == !parent);
1417 bool options_non_empty = options ? qdict_size(options) : false;
1421 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1422 "another block device");
1426 if (filename || options_non_empty) {
1427 error_setg(errp, "Cannot reference an existing block device with "
1428 "additional options or a new filename");
1432 bs = bdrv_lookup_bs(reference, reference, errp);
1447 /* NULL means an empty set of options */
1448 if (options == NULL) {
1449 options = qdict_new();
1453 bs->inherits_from = parent;
1454 flags = child_role->inherit_flags(parent->open_flags);
1457 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err);
1462 /* Find the right image format driver */
1464 drvname = qdict_get_try_str(options, "driver");
1466 drv = bdrv_find_format(drvname);
1467 qdict_del(options, "driver");
1469 error_setg(errp, "Unknown driver: '%s'", drvname);
1475 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1477 bs->open_flags = flags;
1478 bs->options = options;
1479 options = qdict_clone_shallow(options);
1481 /* Open image file without format layer */
1482 if ((flags & BDRV_O_PROTOCOL) == 0) {
1483 if (flags & BDRV_O_RDWR) {
1484 flags |= BDRV_O_ALLOW_RDWR;
1486 if (flags & BDRV_O_SNAPSHOT) {
1487 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1488 flags = bdrv_backing_flags(flags);
1491 assert(file == NULL);
1492 bs->open_flags = flags;
1493 ret = bdrv_open_image(&file, filename, options, "file",
1494 bs, &child_file, true, &local_err);
1500 /* Image format probing */
1503 ret = find_image_format(file, filename, &drv, &local_err);
1508 error_setg(errp, "Must specify either driver or file");
1513 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1514 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1515 /* file must be NULL if a protocol BDS is about to be created
1516 * (the inverse results in an error message from bdrv_open_common()) */
1517 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1519 /* Open the image */
1520 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1525 if (file && (bs->file != file)) {
1530 /* If there is a backing file, use it */
1531 if ((flags & BDRV_O_NO_BACKING) == 0) {
1532 QDict *backing_options;
1534 qdict_extract_subqdict(options, &backing_options, "backing.");
1535 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1537 goto close_and_fail;
1541 bdrv_refresh_filename(bs);
1543 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1544 * temporary snapshot afterwards. */
1545 if (snapshot_flags) {
1546 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1548 goto close_and_fail;
1552 /* Check if any unknown options were used */
1553 if (options && (qdict_size(options) != 0)) {
1554 const QDictEntry *entry = qdict_first(options);
1555 if (flags & BDRV_O_PROTOCOL) {
1556 error_setg(errp, "Block protocol '%s' doesn't support the option "
1557 "'%s'", drv->format_name, entry->key);
1559 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1560 "support the option '%s'", drv->format_name,
1561 bdrv_get_device_name(bs), entry->key);
1565 goto close_and_fail;
1568 if (!bdrv_key_required(bs)) {
1570 blk_dev_change_media_cb(bs->blk, true);
1572 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1573 && !runstate_check(RUN_STATE_INMIGRATE)
1574 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1576 "Guest must be stopped for opening of encrypted image");
1578 goto close_and_fail;
1589 QDECREF(bs->options);
1593 /* If *pbs is NULL, a new BDS has been created in this function and
1594 needs to be freed now. Otherwise, it does not need to be closed,
1595 since it has not really been opened yet. */
1599 error_propagate(errp, local_err);
1604 /* See fail path, but now the BDS has to be always closed */
1612 error_propagate(errp, local_err);
1617 int bdrv_open(BlockDriverState **pbs, const char *filename,
1618 const char *reference, QDict *options, int flags,
1619 BlockDriver *drv, Error **errp)
1621 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1625 typedef struct BlockReopenQueueEntry {
1627 BDRVReopenState state;
1628 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1629 } BlockReopenQueueEntry;
1632 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1633 * reopen of multiple devices.
1635 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1636 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1637 * be created and initialized. This newly created BlockReopenQueue should be
1638 * passed back in for subsequent calls that are intended to be of the same
1641 * bs is the BlockDriverState to add to the reopen queue.
1643 * flags contains the open flags for the associated bs
1645 * returns a pointer to bs_queue, which is either the newly allocated
1646 * bs_queue, or the existing bs_queue being used.
1649 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1650 BlockDriverState *bs, int flags)
1654 BlockReopenQueueEntry *bs_entry;
1657 if (bs_queue == NULL) {
1658 bs_queue = g_new0(BlockReopenQueue, 1);
1659 QSIMPLEQ_INIT(bs_queue);
1662 /* bdrv_open() masks this flag out */
1663 flags &= ~BDRV_O_PROTOCOL;
1665 QLIST_FOREACH(child, &bs->children, next) {
1668 if (child->bs->inherits_from != bs) {
1672 child_flags = child->role->inherit_flags(flags);
1673 bdrv_reopen_queue(bs_queue, child->bs, child_flags);
1676 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1677 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1679 bs_entry->state.bs = bs;
1680 bs_entry->state.flags = flags;
1686 * Reopen multiple BlockDriverStates atomically & transactionally.
1688 * The queue passed in (bs_queue) must have been built up previous
1689 * via bdrv_reopen_queue().
1691 * Reopens all BDS specified in the queue, with the appropriate
1692 * flags. All devices are prepared for reopen, and failure of any
1693 * device will cause all device changes to be abandonded, and intermediate
1696 * If all devices prepare successfully, then the changes are committed
1700 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1703 BlockReopenQueueEntry *bs_entry, *next;
1704 Error *local_err = NULL;
1706 assert(bs_queue != NULL);
1710 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1711 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1712 error_propagate(errp, local_err);
1715 bs_entry->prepared = true;
1718 /* If we reach this point, we have success and just need to apply the
1721 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1722 bdrv_reopen_commit(&bs_entry->state);
1728 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1729 if (ret && bs_entry->prepared) {
1730 bdrv_reopen_abort(&bs_entry->state);
1739 /* Reopen a single BlockDriverState with the specified flags. */
1740 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1743 Error *local_err = NULL;
1744 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1746 ret = bdrv_reopen_multiple(queue, &local_err);
1747 if (local_err != NULL) {
1748 error_propagate(errp, local_err);
1755 * Prepares a BlockDriverState for reopen. All changes are staged in the
1756 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1757 * the block driver layer .bdrv_reopen_prepare()
1759 * bs is the BlockDriverState to reopen
1760 * flags are the new open flags
1761 * queue is the reopen queue
1763 * Returns 0 on success, non-zero on error. On error errp will be set
1766 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1767 * It is the responsibility of the caller to then call the abort() or
1768 * commit() for any other BDS that have been left in a prepare() state
1771 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1775 Error *local_err = NULL;
1778 assert(reopen_state != NULL);
1779 assert(reopen_state->bs->drv != NULL);
1780 drv = reopen_state->bs->drv;
1782 /* if we are to stay read-only, do not allow permission change
1784 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1785 reopen_state->flags & BDRV_O_RDWR) {
1786 error_setg(errp, "Node '%s' is read only",
1787 bdrv_get_device_or_node_name(reopen_state->bs));
1792 ret = bdrv_flush(reopen_state->bs);
1794 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1799 if (drv->bdrv_reopen_prepare) {
1800 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1802 if (local_err != NULL) {
1803 error_propagate(errp, local_err);
1805 error_setg(errp, "failed while preparing to reopen image '%s'",
1806 reopen_state->bs->filename);
1811 /* It is currently mandatory to have a bdrv_reopen_prepare()
1812 * handler for each supported drv. */
1813 error_setg(errp, "Block format '%s' used by node '%s' "
1814 "does not support reopening files", drv->format_name,
1815 bdrv_get_device_or_node_name(reopen_state->bs));
1827 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1828 * makes them final by swapping the staging BlockDriverState contents into
1829 * the active BlockDriverState contents.
1831 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1835 assert(reopen_state != NULL);
1836 drv = reopen_state->bs->drv;
1837 assert(drv != NULL);
1839 /* If there are any driver level actions to take */
1840 if (drv->bdrv_reopen_commit) {
1841 drv->bdrv_reopen_commit(reopen_state);
1844 /* set BDS specific flags now */
1845 reopen_state->bs->open_flags = reopen_state->flags;
1846 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1848 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1850 bdrv_refresh_limits(reopen_state->bs, NULL);
1854 * Abort the reopen, and delete and free the staged changes in
1857 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1861 assert(reopen_state != NULL);
1862 drv = reopen_state->bs->drv;
1863 assert(drv != NULL);
1865 if (drv->bdrv_reopen_abort) {
1866 drv->bdrv_reopen_abort(reopen_state);
1871 void bdrv_close(BlockDriverState *bs)
1873 BdrvAioNotifier *ban, *ban_next;
1876 block_job_cancel_sync(bs->job);
1878 bdrv_drain(bs); /* complete I/O */
1880 bdrv_drain(bs); /* in case flush left pending I/O */
1881 notifier_list_notify(&bs->close_notifiers, bs);
1884 BdrvChild *child, *next;
1886 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1887 if (child->bs->inherits_from == bs) {
1888 child->bs->inherits_from = NULL;
1890 QLIST_REMOVE(child, next);
1894 if (bs->backing_hd) {
1895 BlockDriverState *backing_hd = bs->backing_hd;
1896 bdrv_set_backing_hd(bs, NULL);
1897 bdrv_unref(backing_hd);
1899 bs->drv->bdrv_close(bs);
1903 bs->copy_on_read = 0;
1904 bs->backing_file[0] = '\0';
1905 bs->backing_format[0] = '\0';
1906 bs->total_sectors = 0;
1910 bs->zero_beyond_eof = false;
1911 QDECREF(bs->options);
1913 QDECREF(bs->full_open_options);
1914 bs->full_open_options = NULL;
1916 if (bs->file != NULL) {
1917 bdrv_unref(bs->file);
1923 blk_dev_change_media_cb(bs->blk, false);
1926 /*throttling disk I/O limits*/
1927 if (bs->io_limits_enabled) {
1928 bdrv_io_limits_disable(bs);
1931 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1934 QLIST_INIT(&bs->aio_notifiers);
1937 void bdrv_close_all(void)
1939 BlockDriverState *bs;
1941 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1942 AioContext *aio_context = bdrv_get_aio_context(bs);
1944 aio_context_acquire(aio_context);
1946 aio_context_release(aio_context);
1950 /* make a BlockDriverState anonymous by removing from bdrv_state and
1951 * graph_bdrv_state list.
1952 Also, NULL terminate the device_name to prevent double remove */
1953 void bdrv_make_anon(BlockDriverState *bs)
1956 * Take care to remove bs from bdrv_states only when it's actually
1957 * in it. Note that bs->device_list.tqe_prev is initially null,
1958 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1959 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1960 * resetting it to null on remove.
1962 if (bs->device_list.tqe_prev) {
1963 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1964 bs->device_list.tqe_prev = NULL;
1966 if (bs->node_name[0] != '\0') {
1967 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1969 bs->node_name[0] = '\0';
1972 static void bdrv_rebind(BlockDriverState *bs)
1974 if (bs->drv && bs->drv->bdrv_rebind) {
1975 bs->drv->bdrv_rebind(bs);
1979 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1980 BlockDriverState *bs_src)
1982 /* move some fields that need to stay attached to the device */
1985 bs_dest->guest_block_size = bs_src->guest_block_size;
1986 bs_dest->copy_on_read = bs_src->copy_on_read;
1988 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1990 /* i/o throttled req */
1991 bs_dest->throttle_state = bs_src->throttle_state,
1992 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1993 bs_dest->pending_reqs[0] = bs_src->pending_reqs[0];
1994 bs_dest->pending_reqs[1] = bs_src->pending_reqs[1];
1995 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1996 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1997 memcpy(&bs_dest->round_robin,
1998 &bs_src->round_robin,
1999 sizeof(bs_dest->round_robin));
2000 memcpy(&bs_dest->throttle_timers,
2001 &bs_src->throttle_timers,
2002 sizeof(ThrottleTimers));
2005 bs_dest->on_read_error = bs_src->on_read_error;
2006 bs_dest->on_write_error = bs_src->on_write_error;
2009 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2010 bs_dest->iostatus = bs_src->iostatus;
2013 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2015 /* reference count */
2016 bs_dest->refcnt = bs_src->refcnt;
2019 bs_dest->job = bs_src->job;
2021 /* keep the same entry in bdrv_states */
2022 bs_dest->device_list = bs_src->device_list;
2023 bs_dest->blk = bs_src->blk;
2025 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2026 sizeof(bs_dest->op_blockers));
2030 * Swap bs contents for two image chains while they are live,
2031 * while keeping required fields on the BlockDriverState that is
2032 * actually attached to a device.
2034 * This will modify the BlockDriverState fields, and swap contents
2035 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2037 * bs_new must not be attached to a BlockBackend.
2039 * This function does not create any image files.
2041 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2043 BlockDriverState tmp;
2049 /* The code needs to swap the node_name but simply swapping node_list won't
2050 * work so first remove the nodes from the graph list, do the swap then
2051 * insert them back if needed.
2053 if (bs_new->node_name[0] != '\0') {
2054 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2056 if (bs_old->node_name[0] != '\0') {
2057 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2060 /* If the BlockDriverState is part of a throttling group acquire
2061 * its lock since we're going to mess with the protected fields.
2062 * Otherwise there's no need to worry since no one else can touch
2064 if (bs_old->throttle_state) {
2065 throttle_group_lock(bs_old);
2068 /* bs_new must be unattached and shouldn't have anything fancy enabled */
2069 assert(!bs_new->blk);
2070 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
2071 assert(bs_new->job == NULL);
2072 assert(bs_new->io_limits_enabled == false);
2073 assert(bs_new->throttle_state == NULL);
2074 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2080 /* there are some fields that should not be swapped, move them back */
2081 bdrv_move_feature_fields(&tmp, bs_old);
2082 bdrv_move_feature_fields(bs_old, bs_new);
2083 bdrv_move_feature_fields(bs_new, &tmp);
2085 /* bs_new must remain unattached */
2086 assert(!bs_new->blk);
2088 /* Check a few fields that should remain attached to the device */
2089 assert(bs_new->job == NULL);
2090 assert(bs_new->io_limits_enabled == false);
2091 assert(bs_new->throttle_state == NULL);
2092 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2094 /* Release the ThrottleGroup lock */
2095 if (bs_old->throttle_state) {
2096 throttle_group_unlock(bs_old);
2099 /* insert the nodes back into the graph node list if needed */
2100 if (bs_new->node_name[0] != '\0') {
2101 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2103 if (bs_old->node_name[0] != '\0') {
2104 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2108 * Update lh_first.le_prev for non-empty lists.
2110 * The head of the op blocker list doesn't change because it is moved back
2111 * in bdrv_move_feature_fields().
2113 assert(QLIST_EMPTY(&bs_old->tracked_requests));
2114 assert(QLIST_EMPTY(&bs_new->tracked_requests));
2116 QLIST_FIX_HEAD_PTR(&bs_new->children, next);
2117 QLIST_FIX_HEAD_PTR(&bs_old->children, next);
2119 /* Update references in bs->opaque and children */
2120 QLIST_FOREACH(child, &bs_old->children, next) {
2121 if (child->bs->inherits_from == bs_new) {
2122 child->bs->inherits_from = bs_old;
2125 QLIST_FOREACH(child, &bs_new->children, next) {
2126 if (child->bs->inherits_from == bs_old) {
2127 child->bs->inherits_from = bs_new;
2131 bdrv_rebind(bs_new);
2132 bdrv_rebind(bs_old);
2136 * Add new bs contents at the top of an image chain while the chain is
2137 * live, while keeping required fields on the top layer.
2139 * This will modify the BlockDriverState fields, and swap contents
2140 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2142 * bs_new must not be attached to a BlockBackend.
2144 * This function does not create any image files.
2146 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2148 bdrv_swap(bs_new, bs_top);
2150 /* The contents of 'tmp' will become bs_top, as we are
2151 * swapping bs_new and bs_top contents. */
2152 bdrv_set_backing_hd(bs_top, bs_new);
2153 bdrv_attach_child(bs_top, bs_new, &child_backing);
2156 static void bdrv_delete(BlockDriverState *bs)
2159 assert(bdrv_op_blocker_is_empty(bs));
2160 assert(!bs->refcnt);
2161 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2165 /* remove from list, if necessary */
2172 * Run consistency checks on an image
2174 * Returns 0 if the check could be completed (it doesn't mean that the image is
2175 * free of errors) or -errno when an internal error occurred. The results of the
2176 * check are stored in res.
2178 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2180 if (bs->drv == NULL) {
2183 if (bs->drv->bdrv_check == NULL) {
2187 memset(res, 0, sizeof(*res));
2188 return bs->drv->bdrv_check(bs, res, fix);
2191 #define COMMIT_BUF_SECTORS 2048
2193 /* commit COW file into the raw image */
2194 int bdrv_commit(BlockDriverState *bs)
2196 BlockDriver *drv = bs->drv;
2197 int64_t sector, total_sectors, length, backing_length;
2198 int n, ro, open_flags;
2200 uint8_t *buf = NULL;
2205 if (!bs->backing_hd) {
2209 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2210 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2214 ro = bs->backing_hd->read_only;
2215 open_flags = bs->backing_hd->open_flags;
2218 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2223 length = bdrv_getlength(bs);
2229 backing_length = bdrv_getlength(bs->backing_hd);
2230 if (backing_length < 0) {
2231 ret = backing_length;
2235 /* If our top snapshot is larger than the backing file image,
2236 * grow the backing file image if possible. If not possible,
2237 * we must return an error */
2238 if (length > backing_length) {
2239 ret = bdrv_truncate(bs->backing_hd, length);
2245 total_sectors = length >> BDRV_SECTOR_BITS;
2247 /* qemu_try_blockalign() for bs will choose an alignment that works for
2248 * bs->backing_hd as well, so no need to compare the alignment manually. */
2249 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2255 for (sector = 0; sector < total_sectors; sector += n) {
2256 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2261 ret = bdrv_read(bs, sector, buf, n);
2266 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2273 if (drv->bdrv_make_empty) {
2274 ret = drv->bdrv_make_empty(bs);
2282 * Make sure all data we wrote to the backing device is actually
2285 if (bs->backing_hd) {
2286 bdrv_flush(bs->backing_hd);
2294 /* ignoring error return here */
2295 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2301 int bdrv_commit_all(void)
2303 BlockDriverState *bs;
2305 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2306 AioContext *aio_context = bdrv_get_aio_context(bs);
2308 aio_context_acquire(aio_context);
2309 if (bs->drv && bs->backing_hd) {
2310 int ret = bdrv_commit(bs);
2312 aio_context_release(aio_context);
2316 aio_context_release(aio_context);
2324 * -EINVAL - backing format specified, but no file
2325 * -ENOSPC - can't update the backing file because no space is left in the
2327 * -ENOTSUP - format driver doesn't support changing the backing file
2329 int bdrv_change_backing_file(BlockDriverState *bs,
2330 const char *backing_file, const char *backing_fmt)
2332 BlockDriver *drv = bs->drv;
2335 /* Backing file format doesn't make sense without a backing file */
2336 if (backing_fmt && !backing_file) {
2340 if (drv->bdrv_change_backing_file != NULL) {
2341 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2347 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2348 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2354 * Finds the image layer in the chain that has 'bs' as its backing file.
2356 * active is the current topmost image.
2358 * Returns NULL if bs is not found in active's image chain,
2359 * or if active == bs.
2361 * Returns the bottommost base image if bs == NULL.
2363 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2364 BlockDriverState *bs)
2366 while (active && bs != active->backing_hd) {
2367 active = active->backing_hd;
2373 /* Given a BDS, searches for the base layer. */
2374 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2376 return bdrv_find_overlay(bs, NULL);
2379 typedef struct BlkIntermediateStates {
2380 BlockDriverState *bs;
2381 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2382 } BlkIntermediateStates;
2386 * Drops images above 'base' up to and including 'top', and sets the image
2387 * above 'top' to have base as its backing file.
2389 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2390 * information in 'bs' can be properly updated.
2392 * E.g., this will convert the following chain:
2393 * bottom <- base <- intermediate <- top <- active
2397 * bottom <- base <- active
2399 * It is allowed for bottom==base, in which case it converts:
2401 * base <- intermediate <- top <- active
2407 * If backing_file_str is non-NULL, it will be used when modifying top's
2408 * overlay image metadata.
2411 * if active == top, that is considered an error
2414 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2415 BlockDriverState *base, const char *backing_file_str)
2417 BlockDriverState *intermediate;
2418 BlockDriverState *base_bs = NULL;
2419 BlockDriverState *new_top_bs = NULL;
2420 BlkIntermediateStates *intermediate_state, *next;
2423 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2424 QSIMPLEQ_INIT(&states_to_delete);
2426 if (!top->drv || !base->drv) {
2430 new_top_bs = bdrv_find_overlay(active, top);
2432 if (new_top_bs == NULL) {
2433 /* we could not find the image above 'top', this is an error */
2437 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2438 * to do, no intermediate images */
2439 if (new_top_bs->backing_hd == base) {
2446 /* now we will go down through the list, and add each BDS we find
2447 * into our deletion queue, until we hit the 'base'
2449 while (intermediate) {
2450 intermediate_state = g_new0(BlkIntermediateStates, 1);
2451 intermediate_state->bs = intermediate;
2452 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2454 if (intermediate->backing_hd == base) {
2455 base_bs = intermediate->backing_hd;
2458 intermediate = intermediate->backing_hd;
2460 if (base_bs == NULL) {
2461 /* something went wrong, we did not end at the base. safely
2462 * unravel everything, and exit with error */
2466 /* success - we can delete the intermediate states, and link top->base */
2467 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2468 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2469 base_bs->drv ? base_bs->drv->format_name : "");
2473 bdrv_set_backing_hd(new_top_bs, base_bs);
2475 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2476 /* so that bdrv_close() does not recursively close the chain */
2477 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2478 bdrv_unref(intermediate_state->bs);
2483 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2484 g_free(intermediate_state);
2490 * Truncate file to 'offset' bytes (needed only for file protocols)
2492 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2494 BlockDriver *drv = bs->drv;
2498 if (!drv->bdrv_truncate)
2503 ret = drv->bdrv_truncate(bs, offset);
2505 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2506 bdrv_dirty_bitmap_truncate(bs);
2508 blk_dev_resize_cb(bs->blk);
2515 * Length of a allocated file in bytes. Sparse files are counted by actual
2516 * allocated space. Return < 0 if error or unknown.
2518 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2520 BlockDriver *drv = bs->drv;
2524 if (drv->bdrv_get_allocated_file_size) {
2525 return drv->bdrv_get_allocated_file_size(bs);
2528 return bdrv_get_allocated_file_size(bs->file);
2534 * Return number of sectors on success, -errno on error.
2536 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2538 BlockDriver *drv = bs->drv;
2543 if (drv->has_variable_length) {
2544 int ret = refresh_total_sectors(bs, bs->total_sectors);
2549 return bs->total_sectors;
2553 * Return length in bytes on success, -errno on error.
2554 * The length is always a multiple of BDRV_SECTOR_SIZE.
2556 int64_t bdrv_getlength(BlockDriverState *bs)
2558 int64_t ret = bdrv_nb_sectors(bs);
2560 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2561 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2564 /* return 0 as number of sectors if no device present or error */
2565 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2567 int64_t nb_sectors = bdrv_nb_sectors(bs);
2569 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2572 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2573 BlockdevOnError on_write_error)
2575 bs->on_read_error = on_read_error;
2576 bs->on_write_error = on_write_error;
2579 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2581 return is_read ? bs->on_read_error : bs->on_write_error;
2584 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2586 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2589 case BLOCKDEV_ON_ERROR_ENOSPC:
2590 return (error == ENOSPC) ?
2591 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2592 case BLOCKDEV_ON_ERROR_STOP:
2593 return BLOCK_ERROR_ACTION_STOP;
2594 case BLOCKDEV_ON_ERROR_REPORT:
2595 return BLOCK_ERROR_ACTION_REPORT;
2596 case BLOCKDEV_ON_ERROR_IGNORE:
2597 return BLOCK_ERROR_ACTION_IGNORE;
2603 static void send_qmp_error_event(BlockDriverState *bs,
2604 BlockErrorAction action,
2605 bool is_read, int error)
2607 IoOperationType optype;
2609 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2610 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2611 bdrv_iostatus_is_enabled(bs),
2612 error == ENOSPC, strerror(error),
2616 /* This is done by device models because, while the block layer knows
2617 * about the error, it does not know whether an operation comes from
2618 * the device or the block layer (from a job, for example).
2620 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2621 bool is_read, int error)
2625 if (action == BLOCK_ERROR_ACTION_STOP) {
2626 /* First set the iostatus, so that "info block" returns an iostatus
2627 * that matches the events raised so far (an additional error iostatus
2628 * is fine, but not a lost one).
2630 bdrv_iostatus_set_err(bs, error);
2632 /* Then raise the request to stop the VM and the event.
2633 * qemu_system_vmstop_request_prepare has two effects. First,
2634 * it ensures that the STOP event always comes after the
2635 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2636 * can observe the STOP event and do a "cont" before the STOP
2637 * event is issued, the VM will not stop. In this case, vm_start()
2638 * also ensures that the STOP/RESUME pair of events is emitted.
2640 qemu_system_vmstop_request_prepare();
2641 send_qmp_error_event(bs, action, is_read, error);
2642 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2644 send_qmp_error_event(bs, action, is_read, error);
2648 int bdrv_is_read_only(BlockDriverState *bs)
2650 return bs->read_only;
2653 int bdrv_is_sg(BlockDriverState *bs)
2658 int bdrv_enable_write_cache(BlockDriverState *bs)
2660 return bs->enable_write_cache;
2663 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2665 bs->enable_write_cache = wce;
2667 /* so a reopen() will preserve wce */
2669 bs->open_flags |= BDRV_O_CACHE_WB;
2671 bs->open_flags &= ~BDRV_O_CACHE_WB;
2675 int bdrv_is_encrypted(BlockDriverState *bs)
2677 if (bs->backing_hd && bs->backing_hd->encrypted)
2679 return bs->encrypted;
2682 int bdrv_key_required(BlockDriverState *bs)
2684 BlockDriverState *backing_hd = bs->backing_hd;
2686 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2688 return (bs->encrypted && !bs->valid_key);
2691 int bdrv_set_key(BlockDriverState *bs, const char *key)
2694 if (bs->backing_hd && bs->backing_hd->encrypted) {
2695 ret = bdrv_set_key(bs->backing_hd, key);
2701 if (!bs->encrypted) {
2703 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2706 ret = bs->drv->bdrv_set_key(bs, key);
2709 } else if (!bs->valid_key) {
2712 /* call the change callback now, we skipped it on open */
2713 blk_dev_change_media_cb(bs->blk, true);
2720 * Provide an encryption key for @bs.
2721 * If @key is non-null:
2722 * If @bs is not encrypted, fail.
2723 * Else if the key is invalid, fail.
2724 * Else set @bs's key to @key, replacing the existing key, if any.
2726 * If @bs is encrypted and still lacks a key, fail.
2728 * On failure, store an error object through @errp if non-null.
2730 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2733 if (!bdrv_is_encrypted(bs)) {
2734 error_setg(errp, "Node '%s' is not encrypted",
2735 bdrv_get_device_or_node_name(bs));
2736 } else if (bdrv_set_key(bs, key) < 0) {
2737 error_setg(errp, QERR_INVALID_PASSWORD);
2740 if (bdrv_key_required(bs)) {
2741 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2742 "'%s' (%s) is encrypted",
2743 bdrv_get_device_or_node_name(bs),
2744 bdrv_get_encrypted_filename(bs));
2749 const char *bdrv_get_format_name(BlockDriverState *bs)
2751 return bs->drv ? bs->drv->format_name : NULL;
2754 static int qsort_strcmp(const void *a, const void *b)
2756 return strcmp(a, b);
2759 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2765 const char **formats = NULL;
2767 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2768 if (drv->format_name) {
2771 while (formats && i && !found) {
2772 found = !strcmp(formats[--i], drv->format_name);
2776 formats = g_renew(const char *, formats, count + 1);
2777 formats[count++] = drv->format_name;
2782 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2784 for (i = 0; i < count; i++) {
2785 it(opaque, formats[i]);
2791 /* This function is to find a node in the bs graph */
2792 BlockDriverState *bdrv_find_node(const char *node_name)
2794 BlockDriverState *bs;
2798 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2799 if (!strcmp(node_name, bs->node_name)) {
2806 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2807 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2809 BlockDeviceInfoList *list, *entry;
2810 BlockDriverState *bs;
2813 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2814 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2816 qapi_free_BlockDeviceInfoList(list);
2819 entry = g_malloc0(sizeof(*entry));
2820 entry->value = info;
2828 BlockDriverState *bdrv_lookup_bs(const char *device,
2829 const char *node_name,
2833 BlockDriverState *bs;
2836 blk = blk_by_name(device);
2844 bs = bdrv_find_node(node_name);
2851 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2852 device ? device : "",
2853 node_name ? node_name : "");
2857 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2858 * return false. If either argument is NULL, return false. */
2859 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2861 while (top && top != base) {
2862 top = top->backing_hd;
2868 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2871 return QTAILQ_FIRST(&graph_bdrv_states);
2873 return QTAILQ_NEXT(bs, node_list);
2876 BlockDriverState *bdrv_next(BlockDriverState *bs)
2879 return QTAILQ_FIRST(&bdrv_states);
2881 return QTAILQ_NEXT(bs, device_list);
2884 const char *bdrv_get_node_name(const BlockDriverState *bs)
2886 return bs->node_name;
2889 /* TODO check what callers really want: bs->node_name or blk_name() */
2890 const char *bdrv_get_device_name(const BlockDriverState *bs)
2892 return bs->blk ? blk_name(bs->blk) : "";
2895 /* This can be used to identify nodes that might not have a device
2896 * name associated. Since node and device names live in the same
2897 * namespace, the result is unambiguous. The exception is if both are
2898 * absent, then this returns an empty (non-null) string. */
2899 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2901 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2904 int bdrv_get_flags(BlockDriverState *bs)
2906 return bs->open_flags;
2909 int bdrv_has_zero_init_1(BlockDriverState *bs)
2914 int bdrv_has_zero_init(BlockDriverState *bs)
2918 /* If BS is a copy on write image, it is initialized to
2919 the contents of the base image, which may not be zeroes. */
2920 if (bs->backing_hd) {
2923 if (bs->drv->bdrv_has_zero_init) {
2924 return bs->drv->bdrv_has_zero_init(bs);
2931 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2933 BlockDriverInfo bdi;
2935 if (bs->backing_hd) {
2939 if (bdrv_get_info(bs, &bdi) == 0) {
2940 return bdi.unallocated_blocks_are_zero;
2946 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2948 BlockDriverInfo bdi;
2950 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2954 if (bdrv_get_info(bs, &bdi) == 0) {
2955 return bdi.can_write_zeroes_with_unmap;
2961 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2963 if (bs->backing_hd && bs->backing_hd->encrypted)
2964 return bs->backing_file;
2965 else if (bs->encrypted)
2966 return bs->filename;
2971 void bdrv_get_backing_filename(BlockDriverState *bs,
2972 char *filename, int filename_size)
2974 pstrcpy(filename, filename_size, bs->backing_file);
2977 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2979 BlockDriver *drv = bs->drv;
2982 if (!drv->bdrv_get_info)
2984 memset(bdi, 0, sizeof(*bdi));
2985 return drv->bdrv_get_info(bs, bdi);
2988 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2990 BlockDriver *drv = bs->drv;
2991 if (drv && drv->bdrv_get_specific_info) {
2992 return drv->bdrv_get_specific_info(bs);
2997 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2999 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3003 bs->drv->bdrv_debug_event(bs, event);
3006 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3009 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3013 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3014 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3020 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3022 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3026 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3027 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3033 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3035 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3039 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3040 return bs->drv->bdrv_debug_resume(bs, tag);
3046 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3048 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3052 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3053 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3059 int bdrv_is_snapshot(BlockDriverState *bs)
3061 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3064 /* backing_file can either be relative, or absolute, or a protocol. If it is
3065 * relative, it must be relative to the chain. So, passing in bs->filename
3066 * from a BDS as backing_file should not be done, as that may be relative to
3067 * the CWD rather than the chain. */
3068 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3069 const char *backing_file)
3071 char *filename_full = NULL;
3072 char *backing_file_full = NULL;
3073 char *filename_tmp = NULL;
3074 int is_protocol = 0;
3075 BlockDriverState *curr_bs = NULL;
3076 BlockDriverState *retval = NULL;
3078 if (!bs || !bs->drv || !backing_file) {
3082 filename_full = g_malloc(PATH_MAX);
3083 backing_file_full = g_malloc(PATH_MAX);
3084 filename_tmp = g_malloc(PATH_MAX);
3086 is_protocol = path_has_protocol(backing_file);
3088 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
3090 /* If either of the filename paths is actually a protocol, then
3091 * compare unmodified paths; otherwise make paths relative */
3092 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3093 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3094 retval = curr_bs->backing_hd;
3098 /* If not an absolute filename path, make it relative to the current
3099 * image's filename path */
3100 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3103 /* We are going to compare absolute pathnames */
3104 if (!realpath(filename_tmp, filename_full)) {
3108 /* We need to make sure the backing filename we are comparing against
3109 * is relative to the current image filename (or absolute) */
3110 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3111 curr_bs->backing_file);
3113 if (!realpath(filename_tmp, backing_file_full)) {
3117 if (strcmp(backing_file_full, filename_full) == 0) {
3118 retval = curr_bs->backing_hd;
3124 g_free(filename_full);
3125 g_free(backing_file_full);
3126 g_free(filename_tmp);
3130 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3136 if (!bs->backing_hd) {
3140 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3143 void bdrv_init(void)
3145 module_call_init(MODULE_INIT_BLOCK);
3148 void bdrv_init_with_whitelist(void)
3150 use_bdrv_whitelist = 1;
3154 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3156 Error *local_err = NULL;
3163 if (!(bs->open_flags & BDRV_O_INCOMING)) {
3166 bs->open_flags &= ~BDRV_O_INCOMING;
3168 if (bs->drv->bdrv_invalidate_cache) {
3169 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3170 } else if (bs->file) {
3171 bdrv_invalidate_cache(bs->file, &local_err);
3174 error_propagate(errp, local_err);
3178 ret = refresh_total_sectors(bs, bs->total_sectors);
3180 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3185 void bdrv_invalidate_cache_all(Error **errp)
3187 BlockDriverState *bs;
3188 Error *local_err = NULL;
3190 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3191 AioContext *aio_context = bdrv_get_aio_context(bs);
3193 aio_context_acquire(aio_context);
3194 bdrv_invalidate_cache(bs, &local_err);
3195 aio_context_release(aio_context);
3197 error_propagate(errp, local_err);
3203 /**************************************************************/
3204 /* removable device support */
3207 * Return TRUE if the media is present
3209 int bdrv_is_inserted(BlockDriverState *bs)
3211 BlockDriver *drv = bs->drv;
3215 if (!drv->bdrv_is_inserted)
3217 return drv->bdrv_is_inserted(bs);
3221 * Return whether the media changed since the last call to this
3222 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3224 int bdrv_media_changed(BlockDriverState *bs)
3226 BlockDriver *drv = bs->drv;
3228 if (drv && drv->bdrv_media_changed) {
3229 return drv->bdrv_media_changed(bs);
3235 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3237 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3239 BlockDriver *drv = bs->drv;
3240 const char *device_name;
3242 if (drv && drv->bdrv_eject) {
3243 drv->bdrv_eject(bs, eject_flag);
3246 device_name = bdrv_get_device_name(bs);
3247 if (device_name[0] != '\0') {
3248 qapi_event_send_device_tray_moved(device_name,
3249 eject_flag, &error_abort);
3254 * Lock or unlock the media (if it is locked, the user won't be able
3255 * to eject it manually).
3257 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3259 BlockDriver *drv = bs->drv;
3261 trace_bdrv_lock_medium(bs, locked);
3263 if (drv && drv->bdrv_lock_medium) {
3264 drv->bdrv_lock_medium(bs, locked);
3268 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3270 bs->guest_block_size = align;
3273 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3275 BdrvDirtyBitmap *bm;
3278 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3279 if (bm->name && !strcmp(name, bm->name)) {
3286 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3288 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3289 g_free(bitmap->name);
3290 bitmap->name = NULL;
3293 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3294 uint32_t granularity,
3298 int64_t bitmap_size;
3299 BdrvDirtyBitmap *bitmap;
3300 uint32_t sector_granularity;
3302 assert((granularity & (granularity - 1)) == 0);
3304 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3305 error_setg(errp, "Bitmap already exists: %s", name);
3308 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3309 assert(sector_granularity);
3310 bitmap_size = bdrv_nb_sectors(bs);
3311 if (bitmap_size < 0) {
3312 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3313 errno = -bitmap_size;
3316 bitmap = g_new0(BdrvDirtyBitmap, 1);
3317 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3318 bitmap->size = bitmap_size;
3319 bitmap->name = g_strdup(name);
3320 bitmap->disabled = false;
3321 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3325 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3327 return bitmap->successor;
3330 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3332 return !(bitmap->disabled || bitmap->successor);
3335 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3337 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3338 return DIRTY_BITMAP_STATUS_FROZEN;
3339 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3340 return DIRTY_BITMAP_STATUS_DISABLED;
3342 return DIRTY_BITMAP_STATUS_ACTIVE;
3347 * Create a successor bitmap destined to replace this bitmap after an operation.
3348 * Requires that the bitmap is not frozen and has no successor.
3350 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3351 BdrvDirtyBitmap *bitmap, Error **errp)
3353 uint64_t granularity;
3354 BdrvDirtyBitmap *child;
3356 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3357 error_setg(errp, "Cannot create a successor for a bitmap that is "
3358 "currently frozen");
3361 assert(!bitmap->successor);
3363 /* Create an anonymous successor */
3364 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3365 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3370 /* Successor will be on or off based on our current state. */
3371 child->disabled = bitmap->disabled;
3373 /* Install the successor and freeze the parent */
3374 bitmap->successor = child;
3379 * For a bitmap with a successor, yield our name to the successor,
3380 * delete the old bitmap, and return a handle to the new bitmap.
3382 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3383 BdrvDirtyBitmap *bitmap,
3387 BdrvDirtyBitmap *successor = bitmap->successor;
3389 if (successor == NULL) {
3390 error_setg(errp, "Cannot relinquish control if "
3391 "there's no successor present");
3395 name = bitmap->name;
3396 bitmap->name = NULL;
3397 successor->name = name;
3398 bitmap->successor = NULL;
3399 bdrv_release_dirty_bitmap(bs, bitmap);
3405 * In cases of failure where we can no longer safely delete the parent,
3406 * we may wish to re-join the parent and child/successor.
3407 * The merged parent will be un-frozen, but not explicitly re-enabled.
3409 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3410 BdrvDirtyBitmap *parent,
3413 BdrvDirtyBitmap *successor = parent->successor;
3416 error_setg(errp, "Cannot reclaim a successor when none is present");
3420 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3421 error_setg(errp, "Merging of parent and successor bitmap failed");
3424 bdrv_release_dirty_bitmap(bs, successor);
3425 parent->successor = NULL;
3431 * Truncates _all_ bitmaps attached to a BDS.
3433 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3435 BdrvDirtyBitmap *bitmap;
3436 uint64_t size = bdrv_nb_sectors(bs);
3438 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3439 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3440 hbitmap_truncate(bitmap->bitmap, size);
3441 bitmap->size = size;
3445 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3447 BdrvDirtyBitmap *bm, *next;
3448 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3450 assert(!bdrv_dirty_bitmap_frozen(bm));
3451 QLIST_REMOVE(bitmap, list);
3452 hbitmap_free(bitmap->bitmap);
3453 g_free(bitmap->name);
3460 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3462 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3463 bitmap->disabled = true;
3466 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3468 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3469 bitmap->disabled = false;
3472 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3474 BdrvDirtyBitmap *bm;
3475 BlockDirtyInfoList *list = NULL;
3476 BlockDirtyInfoList **plist = &list;
3478 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3479 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3480 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3481 info->count = bdrv_get_dirty_count(bm);
3482 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3483 info->has_name = !!bm->name;
3484 info->name = g_strdup(bm->name);
3485 info->status = bdrv_dirty_bitmap_status(bm);
3486 entry->value = info;
3488 plist = &entry->next;
3494 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3497 return hbitmap_get(bitmap->bitmap, sector);
3504 * Chooses a default granularity based on the existing cluster size,
3505 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3506 * is no cluster size information available.
3508 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3510 BlockDriverInfo bdi;
3511 uint32_t granularity;
3513 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3514 granularity = MAX(4096, bdi.cluster_size);
3515 granularity = MIN(65536, granularity);
3517 granularity = 65536;
3523 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3525 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3528 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3530 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3533 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3534 int64_t cur_sector, int nr_sectors)
3536 assert(bdrv_dirty_bitmap_enabled(bitmap));
3537 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3540 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3541 int64_t cur_sector, int nr_sectors)
3543 assert(bdrv_dirty_bitmap_enabled(bitmap));
3544 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3547 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3549 assert(bdrv_dirty_bitmap_enabled(bitmap));
3550 hbitmap_reset_all(bitmap->bitmap);
3553 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3556 BdrvDirtyBitmap *bitmap;
3557 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3558 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3561 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3566 * Advance an HBitmapIter to an arbitrary offset.
3568 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3571 hbitmap_iter_init(hbi, hbi->hb, offset);
3574 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3576 return hbitmap_count(bitmap->bitmap);
3579 /* Get a reference to bs */
3580 void bdrv_ref(BlockDriverState *bs)
3585 /* Release a previously grabbed reference to bs.
3586 * If after releasing, reference count is zero, the BlockDriverState is
3588 void bdrv_unref(BlockDriverState *bs)
3593 assert(bs->refcnt > 0);
3594 if (--bs->refcnt == 0) {
3599 struct BdrvOpBlocker {
3601 QLIST_ENTRY(BdrvOpBlocker) list;
3604 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3606 BdrvOpBlocker *blocker;
3607 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3608 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3609 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3611 error_setg(errp, "Node '%s' is busy: %s",
3612 bdrv_get_device_or_node_name(bs),
3613 error_get_pretty(blocker->reason));
3620 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3622 BdrvOpBlocker *blocker;
3623 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3625 blocker = g_new0(BdrvOpBlocker, 1);
3626 blocker->reason = reason;
3627 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3630 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3632 BdrvOpBlocker *blocker, *next;
3633 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3634 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3635 if (blocker->reason == reason) {
3636 QLIST_REMOVE(blocker, list);
3642 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3645 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3646 bdrv_op_block(bs, i, reason);
3650 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3653 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3654 bdrv_op_unblock(bs, i, reason);
3658 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3662 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3663 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3670 void bdrv_iostatus_enable(BlockDriverState *bs)
3672 bs->iostatus_enabled = true;
3673 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3676 /* The I/O status is only enabled if the drive explicitly
3677 * enables it _and_ the VM is configured to stop on errors */
3678 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3680 return (bs->iostatus_enabled &&
3681 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3682 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3683 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3686 void bdrv_iostatus_disable(BlockDriverState *bs)
3688 bs->iostatus_enabled = false;
3691 void bdrv_iostatus_reset(BlockDriverState *bs)
3693 if (bdrv_iostatus_is_enabled(bs)) {
3694 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3696 block_job_iostatus_reset(bs->job);
3701 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3703 assert(bdrv_iostatus_is_enabled(bs));
3704 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3705 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3706 BLOCK_DEVICE_IO_STATUS_FAILED;
3710 void bdrv_img_create(const char *filename, const char *fmt,
3711 const char *base_filename, const char *base_fmt,
3712 char *options, uint64_t img_size, int flags,
3713 Error **errp, bool quiet)
3715 QemuOptsList *create_opts = NULL;
3716 QemuOpts *opts = NULL;
3717 const char *backing_fmt, *backing_file;
3719 BlockDriver *drv, *proto_drv;
3720 BlockDriver *backing_drv = NULL;
3721 Error *local_err = NULL;
3724 /* Find driver and parse its options */
3725 drv = bdrv_find_format(fmt);
3727 error_setg(errp, "Unknown file format '%s'", fmt);
3731 proto_drv = bdrv_find_protocol(filename, true, errp);
3736 if (!drv->create_opts) {
3737 error_setg(errp, "Format driver '%s' does not support image creation",
3742 if (!proto_drv->create_opts) {
3743 error_setg(errp, "Protocol driver '%s' does not support image creation",
3744 proto_drv->format_name);
3748 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3749 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3751 /* Create parameter list with default values */
3752 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3753 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3755 /* Parse -o options */
3757 qemu_opts_do_parse(opts, options, NULL, &local_err);
3759 error_report_err(local_err);
3761 error_setg(errp, "Invalid options for file format '%s'", fmt);
3766 if (base_filename) {
3767 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3769 error_setg(errp, "Backing file not supported for file format '%s'",
3776 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3778 error_setg(errp, "Backing file format not supported for file "
3779 "format '%s'", fmt);
3784 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3786 if (!strcmp(filename, backing_file)) {
3787 error_setg(errp, "Error: Trying to create an image with the "
3788 "same filename as the backing file");
3793 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3795 backing_drv = bdrv_find_format(backing_fmt);
3797 error_setg(errp, "Unknown backing file format '%s'",
3803 // The size for the image must always be specified, with one exception:
3804 // If we are using a backing file, we can obtain the size from there
3805 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3808 BlockDriverState *bs;
3809 char *full_backing = g_new0(char, PATH_MAX);
3813 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3814 full_backing, PATH_MAX,
3817 g_free(full_backing);
3821 /* backing files always opened read-only */
3823 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3826 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3827 backing_drv, &local_err);
3828 g_free(full_backing);
3832 size = bdrv_getlength(bs);
3834 error_setg_errno(errp, -size, "Could not get size of '%s'",
3840 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3844 error_setg(errp, "Image creation needs a size parameter");
3850 printf("Formatting '%s', fmt=%s", filename, fmt);
3851 qemu_opts_print(opts, " ");
3855 ret = bdrv_create(drv, filename, opts, &local_err);
3857 if (ret == -EFBIG) {
3858 /* This is generally a better message than whatever the driver would
3859 * deliver (especially because of the cluster_size_hint), since that
3860 * is most probably not much different from "image too large". */
3861 const char *cluster_size_hint = "";
3862 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3863 cluster_size_hint = " (try using a larger cluster size)";
3865 error_setg(errp, "The image size is too large for file format '%s'"
3866 "%s", fmt, cluster_size_hint);
3867 error_free(local_err);
3872 qemu_opts_del(opts);
3873 qemu_opts_free(create_opts);
3875 error_propagate(errp, local_err);
3879 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3881 return bs->aio_context;
3884 void bdrv_detach_aio_context(BlockDriverState *bs)
3886 BdrvAioNotifier *baf;
3892 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3893 baf->detach_aio_context(baf->opaque);
3896 if (bs->io_limits_enabled) {
3897 throttle_timers_detach_aio_context(&bs->throttle_timers);
3899 if (bs->drv->bdrv_detach_aio_context) {
3900 bs->drv->bdrv_detach_aio_context(bs);
3903 bdrv_detach_aio_context(bs->file);
3905 if (bs->backing_hd) {
3906 bdrv_detach_aio_context(bs->backing_hd);
3909 bs->aio_context = NULL;
3912 void bdrv_attach_aio_context(BlockDriverState *bs,
3913 AioContext *new_context)
3915 BdrvAioNotifier *ban;
3921 bs->aio_context = new_context;
3923 if (bs->backing_hd) {
3924 bdrv_attach_aio_context(bs->backing_hd, new_context);
3927 bdrv_attach_aio_context(bs->file, new_context);
3929 if (bs->drv->bdrv_attach_aio_context) {
3930 bs->drv->bdrv_attach_aio_context(bs, new_context);
3932 if (bs->io_limits_enabled) {
3933 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3936 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3937 ban->attached_aio_context(new_context, ban->opaque);
3941 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3943 bdrv_drain(bs); /* ensure there are no in-flight requests */
3945 bdrv_detach_aio_context(bs);
3947 /* This function executes in the old AioContext so acquire the new one in
3948 * case it runs in a different thread.
3950 aio_context_acquire(new_context);
3951 bdrv_attach_aio_context(bs, new_context);
3952 aio_context_release(new_context);
3955 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3956 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3957 void (*detach_aio_context)(void *opaque), void *opaque)
3959 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3960 *ban = (BdrvAioNotifier){
3961 .attached_aio_context = attached_aio_context,
3962 .detach_aio_context = detach_aio_context,
3966 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3969 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3970 void (*attached_aio_context)(AioContext *,
3972 void (*detach_aio_context)(void *),
3975 BdrvAioNotifier *ban, *ban_next;
3977 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3978 if (ban->attached_aio_context == attached_aio_context &&
3979 ban->detach_aio_context == detach_aio_context &&
3980 ban->opaque == opaque)
3982 QLIST_REMOVE(ban, list);
3992 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3993 BlockDriverAmendStatusCB *status_cb)
3995 if (!bs->drv->bdrv_amend_options) {
3998 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
4001 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4002 * of block filter and by bdrv_is_first_non_filter.
4003 * It is used to test if the given bs is the candidate or recurse more in the
4006 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4007 BlockDriverState *candidate)
4009 /* return false if basic checks fails */
4010 if (!bs || !bs->drv) {
4014 /* the code reached a non block filter driver -> check if the bs is
4015 * the same as the candidate. It's the recursion termination condition.
4017 if (!bs->drv->is_filter) {
4018 return bs == candidate;
4020 /* Down this path the driver is a block filter driver */
4022 /* If the block filter recursion method is defined use it to recurse down
4025 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4026 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4029 /* the driver is a block filter but don't allow to recurse -> return false
4034 /* This function checks if the candidate is the first non filter bs down it's
4035 * bs chain. Since we don't have pointers to parents it explore all bs chains
4036 * from the top. Some filters can choose not to pass down the recursion.
4038 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4040 BlockDriverState *bs;
4042 /* walk down the bs forest recursively */
4043 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4046 /* try to recurse in this top level bs */
4047 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4049 /* candidate is the first non filter */
4058 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
4060 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4061 AioContext *aio_context;
4063 if (!to_replace_bs) {
4064 error_setg(errp, "Node name '%s' not found", node_name);
4068 aio_context = bdrv_get_aio_context(to_replace_bs);
4069 aio_context_acquire(aio_context);
4071 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4072 to_replace_bs = NULL;
4076 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4077 * most non filter in order to prevent data corruption.
4078 * Another benefit is that this tests exclude backing files which are
4079 * blocked by the backing blockers.
4081 if (!bdrv_is_first_non_filter(to_replace_bs)) {
4082 error_setg(errp, "Only top most non filter can be replaced");
4083 to_replace_bs = NULL;
4088 aio_context_release(aio_context);
4089 return to_replace_bs;
4092 static bool append_open_options(QDict *d, BlockDriverState *bs)
4094 const QDictEntry *entry;
4095 bool found_any = false;
4097 for (entry = qdict_first(bs->options); entry;
4098 entry = qdict_next(bs->options, entry))
4100 /* Only take options for this level and exclude all non-driver-specific
4102 if (!strchr(qdict_entry_key(entry), '.') &&
4103 strcmp(qdict_entry_key(entry), "node-name"))
4105 qobject_incref(qdict_entry_value(entry));
4106 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4114 /* Updates the following BDS fields:
4115 * - exact_filename: A filename which may be used for opening a block device
4116 * which (mostly) equals the given BDS (even without any
4117 * other options; so reading and writing must return the same
4118 * results, but caching etc. may be different)
4119 * - full_open_options: Options which, when given when opening a block device
4120 * (without a filename), result in a BDS (mostly)
4121 * equalling the given one
4122 * - filename: If exact_filename is set, it is copied here. Otherwise,
4123 * full_open_options is converted to a JSON object, prefixed with
4124 * "json:" (for use through the JSON pseudo protocol) and put here.
4126 void bdrv_refresh_filename(BlockDriverState *bs)
4128 BlockDriver *drv = bs->drv;
4135 /* This BDS's file name will most probably depend on its file's name, so
4136 * refresh that first */
4138 bdrv_refresh_filename(bs->file);
4141 if (drv->bdrv_refresh_filename) {
4142 /* Obsolete information is of no use here, so drop the old file name
4143 * information before refreshing it */
4144 bs->exact_filename[0] = '\0';
4145 if (bs->full_open_options) {
4146 QDECREF(bs->full_open_options);
4147 bs->full_open_options = NULL;
4150 drv->bdrv_refresh_filename(bs);
4151 } else if (bs->file) {
4152 /* Try to reconstruct valid information from the underlying file */
4153 bool has_open_options;
4155 bs->exact_filename[0] = '\0';
4156 if (bs->full_open_options) {
4157 QDECREF(bs->full_open_options);
4158 bs->full_open_options = NULL;
4162 has_open_options = append_open_options(opts, bs);
4164 /* If no specific options have been given for this BDS, the filename of
4165 * the underlying file should suffice for this one as well */
4166 if (bs->file->exact_filename[0] && !has_open_options) {
4167 strcpy(bs->exact_filename, bs->file->exact_filename);
4169 /* Reconstructing the full options QDict is simple for most format block
4170 * drivers, as long as the full options are known for the underlying
4171 * file BDS. The full options QDict of that file BDS should somehow
4172 * contain a representation of the filename, therefore the following
4173 * suffices without querying the (exact_)filename of this BDS. */
4174 if (bs->file->full_open_options) {
4175 qdict_put_obj(opts, "driver",
4176 QOBJECT(qstring_from_str(drv->format_name)));
4177 QINCREF(bs->file->full_open_options);
4178 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4180 bs->full_open_options = opts;
4184 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4185 /* There is no underlying file BDS (at least referenced by BDS.file),
4186 * so the full options QDict should be equal to the options given
4187 * specifically for this block device when it was opened (plus the
4188 * driver specification).
4189 * Because those options don't change, there is no need to update
4190 * full_open_options when it's already set. */
4193 append_open_options(opts, bs);
4194 qdict_put_obj(opts, "driver",
4195 QOBJECT(qstring_from_str(drv->format_name)));
4197 if (bs->exact_filename[0]) {
4198 /* This may not work for all block protocol drivers (some may
4199 * require this filename to be parsed), but we have to find some
4200 * default solution here, so just include it. If some block driver
4201 * does not support pure options without any filename at all or
4202 * needs some special format of the options QDict, it needs to
4203 * implement the driver-specific bdrv_refresh_filename() function.
4205 qdict_put_obj(opts, "filename",
4206 QOBJECT(qstring_from_str(bs->exact_filename)));
4209 bs->full_open_options = opts;
4212 if (bs->exact_filename[0]) {
4213 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4214 } else if (bs->full_open_options) {
4215 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4216 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4217 qstring_get_str(json));
4222 /* This accessor function purpose is to allow the device models to access the
4223 * BlockAcctStats structure embedded inside a BlockDriverState without being
4224 * aware of the BlockDriverState structure layout.
4225 * It will go away when the BlockAcctStats structure will be moved inside
4226 * the device models.
4228 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)