2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "block/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
44 #include <sys/types.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
58 * A BdrvDirtyBitmap can be in three possible states:
59 * (1) successor is NULL and disabled is false: full r/w mode
60 * (2) successor is NULL and disabled is true: read only mode ("disabled")
61 * (3) successor is set: frozen mode.
62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63 * or enabled. A frozen bitmap can only abdicate() or reclaim().
65 struct BdrvDirtyBitmap {
66 HBitmap *bitmap; /* Dirty sector bitmap implementation */
67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68 char *name; /* Optional non-empty unique ID */
69 int64_t size; /* Size of the bitmap (Number of sectors) */
70 bool disabled; /* Bitmap is read-only */
71 QLIST_ENTRY(BdrvDirtyBitmap) list;
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role, Error **errp);
90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91 /* If non-zero, use only whitelisted block drivers */
92 static int use_bdrv_whitelist;
95 static int is_windows_drive_prefix(const char *filename)
97 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
98 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
102 int is_windows_drive(const char *filename)
104 if (is_windows_drive_prefix(filename) &&
107 if (strstart(filename, "\\\\.\\", NULL) ||
108 strstart(filename, "//./", NULL))
114 size_t bdrv_opt_mem_align(BlockDriverState *bs)
116 if (!bs || !bs->drv) {
117 /* page size or 4k (hdd sector size) should be on the safe side */
118 return MAX(4096, getpagesize());
121 return bs->bl.opt_mem_alignment;
124 size_t bdrv_min_mem_align(BlockDriverState *bs)
126 if (!bs || !bs->drv) {
127 /* page size or 4k (hdd sector size) should be on the safe side */
128 return MAX(4096, getpagesize());
131 return bs->bl.min_mem_alignment;
134 /* check if the path starts with "<protocol>:" */
135 int path_has_protocol(const char *path)
140 if (is_windows_drive(path) ||
141 is_windows_drive_prefix(path)) {
144 p = path + strcspn(path, ":/\\");
146 p = path + strcspn(path, ":/");
152 int path_is_absolute(const char *path)
155 /* specific case for names like: "\\.\d:" */
156 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
159 return (*path == '/' || *path == '\\');
161 return (*path == '/');
165 /* if filename is absolute, just copy it to dest. Otherwise, build a
166 path to it by considering it is relative to base_path. URL are
168 void path_combine(char *dest, int dest_size,
169 const char *base_path,
170 const char *filename)
177 if (path_is_absolute(filename)) {
178 pstrcpy(dest, dest_size, filename);
180 p = strchr(base_path, ':');
185 p1 = strrchr(base_path, '/');
189 p2 = strrchr(base_path, '\\');
201 if (len > dest_size - 1)
203 memcpy(dest, base_path, len);
205 pstrcat(dest, dest_size, filename);
209 void bdrv_get_full_backing_filename_from_filename(const char *backed,
211 char *dest, size_t sz,
214 if (backing[0] == '\0' || path_has_protocol(backing) ||
215 path_is_absolute(backing))
217 pstrcpy(dest, sz, backing);
218 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
219 error_setg(errp, "Cannot use relative backing file names for '%s'",
222 path_combine(dest, sz, backed, backing);
226 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
229 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
231 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
235 void bdrv_register(BlockDriver *bdrv)
237 bdrv_setup_io_funcs(bdrv);
239 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
242 BlockDriverState *bdrv_new_root(void)
244 BlockDriverState *bs = bdrv_new();
246 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
250 BlockDriverState *bdrv_new(void)
252 BlockDriverState *bs;
255 bs = g_new0(BlockDriverState, 1);
256 QLIST_INIT(&bs->dirty_bitmaps);
257 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
258 QLIST_INIT(&bs->op_blockers[i]);
260 bdrv_iostatus_disable(bs);
261 notifier_list_init(&bs->close_notifiers);
262 notifier_with_return_list_init(&bs->before_write_notifiers);
263 qemu_co_queue_init(&bs->throttled_reqs[0]);
264 qemu_co_queue_init(&bs->throttled_reqs[1]);
266 bs->aio_context = qemu_get_aio_context();
271 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
273 notifier_list_add(&bs->close_notifiers, notify);
276 BlockDriver *bdrv_find_format(const char *format_name)
279 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280 if (!strcmp(drv1->format_name, format_name)) {
287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
289 static const char *whitelist_rw[] = {
290 CONFIG_BDRV_RW_WHITELIST
292 static const char *whitelist_ro[] = {
293 CONFIG_BDRV_RO_WHITELIST
297 if (!whitelist_rw[0] && !whitelist_ro[0]) {
298 return 1; /* no whitelist, anything goes */
301 for (p = whitelist_rw; *p; p++) {
302 if (!strcmp(drv->format_name, *p)) {
307 for (p = whitelist_ro; *p; p++) {
308 if (!strcmp(drv->format_name, *p)) {
316 typedef struct CreateCo {
324 static void coroutine_fn bdrv_create_co_entry(void *opaque)
326 Error *local_err = NULL;
329 CreateCo *cco = opaque;
332 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
334 error_propagate(&cco->err, local_err);
339 int bdrv_create(BlockDriver *drv, const char* filename,
340 QemuOpts *opts, Error **errp)
347 .filename = g_strdup(filename),
353 if (!drv->bdrv_create) {
354 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
359 if (qemu_in_coroutine()) {
360 /* Fast-path if already in coroutine context */
361 bdrv_create_co_entry(&cco);
363 co = qemu_coroutine_create(bdrv_create_co_entry);
364 qemu_coroutine_enter(co, &cco);
365 while (cco.ret == NOT_DONE) {
366 aio_poll(qemu_get_aio_context(), true);
373 error_propagate(errp, cco.err);
375 error_setg_errno(errp, -ret, "Could not create image");
380 g_free(cco.filename);
384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
387 Error *local_err = NULL;
390 drv = bdrv_find_protocol(filename, true, errp);
395 ret = bdrv_create(drv, filename, opts, &local_err);
397 error_propagate(errp, local_err);
403 * Try to get @bs's logical and physical block size.
404 * On success, store them in @bsz struct and return 0.
405 * On failure return -errno.
406 * @bs must not be empty.
408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
410 BlockDriver *drv = bs->drv;
412 if (drv && drv->bdrv_probe_blocksizes) {
413 return drv->bdrv_probe_blocksizes(bs, bsz);
420 * Try to get @bs's geometry (cyls, heads, sectors).
421 * On success, store them in @geo struct and return 0.
422 * On failure return -errno.
423 * @bs must not be empty.
425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
427 BlockDriver *drv = bs->drv;
429 if (drv && drv->bdrv_probe_geometry) {
430 return drv->bdrv_probe_geometry(bs, geo);
437 * Create a uniquely-named empty temporary file.
438 * Return 0 upon success, otherwise a negative errno value.
440 int get_tmp_filename(char *filename, int size)
443 char temp_dir[MAX_PATH];
444 /* GetTempFileName requires that its output buffer (4th param)
445 have length MAX_PATH or greater. */
446 assert(size >= MAX_PATH);
447 return (GetTempPath(MAX_PATH, temp_dir)
448 && GetTempFileName(temp_dir, "qem", 0, filename)
449 ? 0 : -GetLastError());
453 tmpdir = getenv("TMPDIR");
457 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
460 fd = mkstemp(filename);
464 if (close(fd) != 0) {
473 * Detect host devices. By convention, /dev/cdrom[N] is always
474 * recognized as a host CDROM.
476 static BlockDriver *find_hdev_driver(const char *filename)
478 int score_max = 0, score;
479 BlockDriver *drv = NULL, *d;
481 QLIST_FOREACH(d, &bdrv_drivers, list) {
482 if (d->bdrv_probe_device) {
483 score = d->bdrv_probe_device(filename);
484 if (score > score_max) {
494 BlockDriver *bdrv_find_protocol(const char *filename,
495 bool allow_protocol_prefix,
503 /* TODO Drivers without bdrv_file_open must be specified explicitly */
506 * XXX(hch): we really should not let host device detection
507 * override an explicit protocol specification, but moving this
508 * later breaks access to device names with colons in them.
509 * Thanks to the brain-dead persistent naming schemes on udev-
510 * based Linux systems those actually are quite common.
512 drv1 = find_hdev_driver(filename);
517 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
521 p = strchr(filename, ':');
524 if (len > sizeof(protocol) - 1)
525 len = sizeof(protocol) - 1;
526 memcpy(protocol, filename, len);
527 protocol[len] = '\0';
528 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529 if (drv1->protocol_name &&
530 !strcmp(drv1->protocol_name, protocol)) {
535 error_setg(errp, "Unknown protocol '%s'", protocol);
540 * Guess image format by probing its contents.
541 * This is not a good idea when your image is raw (CVE-2008-2004), but
542 * we do it anyway for backward compatibility.
544 * @buf contains the image's first @buf_size bytes.
545 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546 * but can be smaller if the image file is smaller)
547 * @filename is its filename.
549 * For all block drivers, call the bdrv_probe() method to get its
551 * Return the first block driver with the highest probing score.
553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554 const char *filename)
556 int score_max = 0, score;
557 BlockDriver *drv = NULL, *d;
559 QLIST_FOREACH(d, &bdrv_drivers, list) {
561 score = d->bdrv_probe(buf, buf_size, filename);
562 if (score > score_max) {
572 static int find_image_format(BlockDriverState *bs, const char *filename,
573 BlockDriver **pdrv, Error **errp)
576 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
579 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
585 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
587 error_setg_errno(errp, -ret, "Could not read image for determining its "
593 drv = bdrv_probe_all(buf, ret, filename);
595 error_setg(errp, "Could not determine image format: No compatible "
604 * Set the current 'total_sectors' value
605 * Return 0 on success, -errno on error.
607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
609 BlockDriver *drv = bs->drv;
611 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
615 /* query actual device if possible, otherwise just trust the hint */
616 if (drv->bdrv_getlength) {
617 int64_t length = drv->bdrv_getlength(bs);
621 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
624 bs->total_sectors = hint;
629 * Set open flags for a given discard mode
631 * Return 0 on success, -1 if the discard mode was invalid.
633 int bdrv_parse_discard_flags(const char *mode, int *flags)
635 *flags &= ~BDRV_O_UNMAP;
637 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
639 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
640 *flags |= BDRV_O_UNMAP;
649 * Set open flags for a given cache mode
651 * Return 0 on success, -1 if the cache mode was invalid.
653 int bdrv_parse_cache_flags(const char *mode, int *flags)
655 *flags &= ~BDRV_O_CACHE_MASK;
657 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
658 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
659 } else if (!strcmp(mode, "directsync")) {
660 *flags |= BDRV_O_NOCACHE;
661 } else if (!strcmp(mode, "writeback")) {
662 *flags |= BDRV_O_CACHE_WB;
663 } else if (!strcmp(mode, "unsafe")) {
664 *flags |= BDRV_O_CACHE_WB;
665 *flags |= BDRV_O_NO_FLUSH;
666 } else if (!strcmp(mode, "writethrough")) {
667 /* this is the default */
676 * Returns the flags that a temporary snapshot should get, based on the
677 * originally requested flags (the originally requested image will have flags
678 * like a backing file)
680 static int bdrv_temp_snapshot_flags(int flags)
682 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
686 * Returns the flags that bs->file should get if a protocol driver is expected,
687 * based on the given flags for the parent BDS
689 static int bdrv_inherited_flags(int flags)
691 /* Enable protocol handling, disable format probing for bs->file */
692 flags |= BDRV_O_PROTOCOL;
694 /* Our block drivers take care to send flushes and respect unmap policy,
695 * so we can enable both unconditionally on lower layers. */
696 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
698 /* Clear flags that only apply to the top layer */
699 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
704 const BdrvChildRole child_file = {
705 .inherit_flags = bdrv_inherited_flags,
709 * Returns the flags that bs->file should get if the use of formats (and not
710 * only protocols) is permitted for it, based on the given flags for the parent
713 static int bdrv_inherited_fmt_flags(int parent_flags)
715 int flags = child_file.inherit_flags(parent_flags);
716 return flags & ~BDRV_O_PROTOCOL;
719 const BdrvChildRole child_format = {
720 .inherit_flags = bdrv_inherited_fmt_flags,
724 * Returns the flags that bs->backing_hd should get, based on the given flags
727 static int bdrv_backing_flags(int flags)
729 /* backing files always opened read-only */
730 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
732 /* snapshot=on is handled on the top layer */
733 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
738 static const BdrvChildRole child_backing = {
739 .inherit_flags = bdrv_backing_flags,
742 static int bdrv_open_flags(BlockDriverState *bs, int flags)
744 int open_flags = flags | BDRV_O_CACHE_WB;
747 * Clear flags that are internal to the block layer before opening the
750 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
753 * Snapshots should be writable.
755 if (flags & BDRV_O_TEMPORARY) {
756 open_flags |= BDRV_O_RDWR;
762 static void bdrv_assign_node_name(BlockDriverState *bs,
763 const char *node_name,
770 /* Check for empty string or invalid characters */
771 if (!id_wellformed(node_name)) {
772 error_setg(errp, "Invalid node name");
776 /* takes care of avoiding namespaces collisions */
777 if (blk_by_name(node_name)) {
778 error_setg(errp, "node-name=%s is conflicting with a device id",
783 /* takes care of avoiding duplicates node names */
784 if (bdrv_find_node(node_name)) {
785 error_setg(errp, "Duplicate node name");
789 /* copy node name into the bs and insert it into the graph list */
790 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
791 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
794 static QemuOptsList bdrv_runtime_opts = {
795 .name = "bdrv_common",
796 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
800 .type = QEMU_OPT_STRING,
801 .help = "Node name of the block device node",
803 { /* end of list */ }
808 * Common part for opening disk images and files
810 * Removes all processed options from *options.
812 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
813 QDict *options, int flags, BlockDriver *drv, Error **errp)
816 const char *filename;
817 const char *node_name = NULL;
819 Error *local_err = NULL;
822 assert(bs->file == NULL);
823 assert(options != NULL && bs->options != options);
826 filename = file->bs->filename;
828 filename = qdict_get_try_str(options, "filename");
831 if (drv->bdrv_needs_filename && !filename) {
832 error_setg(errp, "The '%s' block driver requires a file name",
837 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
839 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
840 qemu_opts_absorb_qdict(opts, options, &local_err);
842 error_propagate(errp, local_err);
847 node_name = qemu_opt_get(opts, "node-name");
848 bdrv_assign_node_name(bs, node_name, &local_err);
850 error_propagate(errp, local_err);
855 bs->guest_block_size = 512;
856 bs->request_alignment = 512;
857 bs->zero_beyond_eof = true;
858 open_flags = bdrv_open_flags(bs, flags);
859 bs->read_only = !(open_flags & BDRV_O_RDWR);
861 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
863 !bs->read_only && bdrv_is_whitelisted(drv, true)
864 ? "Driver '%s' can only be used for read-only devices"
865 : "Driver '%s' is not whitelisted",
871 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
872 if (flags & BDRV_O_COPY_ON_READ) {
873 if (!bs->read_only) {
874 bdrv_enable_copy_on_read(bs);
876 error_setg(errp, "Can't use copy-on-read on read-only device");
882 if (filename != NULL) {
883 pstrcpy(bs->filename, sizeof(bs->filename), filename);
885 bs->filename[0] = '\0';
887 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
890 bs->opaque = g_malloc0(drv->instance_size);
892 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
894 /* Open the image, either directly or using a protocol */
895 if (drv->bdrv_file_open) {
896 assert(file == NULL);
897 assert(!drv->bdrv_needs_filename || filename != NULL);
898 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
901 error_setg(errp, "Can't use '%s' as a block driver for the "
902 "protocol level", drv->format_name);
907 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
912 error_propagate(errp, local_err);
913 } else if (bs->filename[0]) {
914 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
916 error_setg_errno(errp, -ret, "Could not open image");
922 error_report("Encrypted images are deprecated");
923 error_printf("Support for them will be removed in a future release.\n"
924 "You can use 'qemu-img convert' to convert your image"
925 " to an unencrypted one.\n");
928 ret = refresh_total_sectors(bs, bs->total_sectors);
930 error_setg_errno(errp, -ret, "Could not refresh total sector count");
934 bdrv_refresh_limits(bs, &local_err);
936 error_propagate(errp, local_err);
941 assert(bdrv_opt_mem_align(bs) != 0);
942 assert(bdrv_min_mem_align(bs) != 0);
943 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
958 static QDict *parse_json_filename(const char *filename, Error **errp)
960 QObject *options_obj;
964 ret = strstart(filename, "json:", &filename);
967 options_obj = qobject_from_json(filename);
969 error_setg(errp, "Could not parse the JSON options");
973 if (qobject_type(options_obj) != QTYPE_QDICT) {
974 qobject_decref(options_obj);
975 error_setg(errp, "Invalid JSON object given");
979 options = qobject_to_qdict(options_obj);
980 qdict_flatten(options);
986 * Fills in default options for opening images and converts the legacy
987 * filename/flags pair to option QDict entries.
988 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
989 * block driver has been specified explicitly.
991 static int bdrv_fill_options(QDict **options, const char **pfilename,
992 int *flags, Error **errp)
994 const char *filename = *pfilename;
996 bool protocol = *flags & BDRV_O_PROTOCOL;
997 bool parse_filename = false;
998 BlockDriver *drv = NULL;
999 Error *local_err = NULL;
1001 /* Parse json: pseudo-protocol */
1002 if (filename && g_str_has_prefix(filename, "json:")) {
1003 QDict *json_options = parse_json_filename(filename, &local_err);
1005 error_propagate(errp, local_err);
1009 /* Options given in the filename have lower priority than options
1010 * specified directly */
1011 qdict_join(*options, json_options, false);
1012 QDECREF(json_options);
1013 *pfilename = filename = NULL;
1016 drvname = qdict_get_try_str(*options, "driver");
1018 drv = bdrv_find_format(drvname);
1020 error_setg(errp, "Unknown driver '%s'", drvname);
1023 /* If the user has explicitly specified the driver, this choice should
1024 * override the BDRV_O_PROTOCOL flag */
1025 protocol = drv->bdrv_file_open;
1029 *flags |= BDRV_O_PROTOCOL;
1031 *flags &= ~BDRV_O_PROTOCOL;
1034 /* Fetch the file name from the options QDict if necessary */
1035 if (protocol && filename) {
1036 if (!qdict_haskey(*options, "filename")) {
1037 qdict_put(*options, "filename", qstring_from_str(filename));
1038 parse_filename = true;
1040 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1046 /* Find the right block driver */
1047 filename = qdict_get_try_str(*options, "filename");
1049 if (!drvname && protocol) {
1051 drv = bdrv_find_protocol(filename, parse_filename, errp);
1056 drvname = drv->format_name;
1057 qdict_put(*options, "driver", qstring_from_str(drvname));
1059 error_setg(errp, "Must specify either driver or file");
1064 assert(drv || !protocol);
1066 /* Driver-specific filename parsing */
1067 if (drv && drv->bdrv_parse_filename && parse_filename) {
1068 drv->bdrv_parse_filename(filename, *options, &local_err);
1070 error_propagate(errp, local_err);
1074 if (!drv->bdrv_needs_filename) {
1075 qdict_del(*options, "filename");
1082 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1083 BlockDriverState *child_bs,
1084 const BdrvChildRole *child_role)
1086 BdrvChild *child = g_new(BdrvChild, 1);
1087 *child = (BdrvChild) {
1092 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1097 static void bdrv_detach_child(BdrvChild *child)
1099 QLIST_REMOVE(child, next);
1103 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1105 BlockDriverState *child_bs = child->bs;
1107 if (child->bs->inherits_from == parent) {
1108 child->bs->inherits_from = NULL;
1111 bdrv_detach_child(child);
1112 bdrv_unref(child_bs);
1115 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1118 if (bs->backing_hd) {
1119 assert(bs->backing_blocker);
1120 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1121 bdrv_detach_child(bs->backing_child);
1122 } else if (backing_hd) {
1123 error_setg(&bs->backing_blocker,
1124 "node is used as backing hd of '%s'",
1125 bdrv_get_device_or_node_name(bs));
1128 bs->backing_hd = backing_hd;
1130 error_free(bs->backing_blocker);
1131 bs->backing_blocker = NULL;
1132 bs->backing_child = NULL;
1135 bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
1136 bs->open_flags &= ~BDRV_O_NO_BACKING;
1137 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1138 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1139 backing_hd->drv ? backing_hd->drv->format_name : "");
1141 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1142 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1143 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1144 bs->backing_blocker);
1146 bdrv_refresh_limits(bs, NULL);
1150 * Opens the backing file for a BlockDriverState if not yet open
1152 * options is a QDict of options to pass to the block drivers, or NULL for an
1153 * empty set of options. The reference to the QDict is transferred to this
1154 * function (even on failure), so if the caller intends to reuse the dictionary,
1155 * it needs to use QINCREF() before calling bdrv_file_open.
1157 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1159 char *backing_filename = g_malloc0(PATH_MAX);
1161 BlockDriverState *backing_hd;
1162 Error *local_err = NULL;
1164 if (bs->backing_hd != NULL) {
1169 /* NULL means an empty set of options */
1170 if (options == NULL) {
1171 options = qdict_new();
1174 bs->open_flags &= ~BDRV_O_NO_BACKING;
1175 if (qdict_haskey(options, "file.filename")) {
1176 backing_filename[0] = '\0';
1177 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1181 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1185 error_propagate(errp, local_err);
1191 if (!bs->drv || !bs->drv->supports_backing) {
1193 error_setg(errp, "Driver doesn't support backing files");
1198 backing_hd = bdrv_new();
1200 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1201 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1204 assert(bs->backing_hd == NULL);
1205 ret = bdrv_open_inherit(&backing_hd,
1206 *backing_filename ? backing_filename : NULL,
1207 NULL, options, 0, bs, &child_backing, &local_err);
1209 bdrv_unref(backing_hd);
1211 bs->open_flags |= BDRV_O_NO_BACKING;
1212 error_setg(errp, "Could not open backing file: %s",
1213 error_get_pretty(local_err));
1214 error_free(local_err);
1218 bdrv_set_backing_hd(bs, backing_hd);
1221 g_free(backing_filename);
1226 * Opens a disk image whose options are given as BlockdevRef in another block
1229 * If allow_none is true, no image will be opened if filename is false and no
1230 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1232 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1233 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1234 * itself, all options starting with "${bdref_key}." are considered part of the
1237 * The BlockdevRef will be removed from the options QDict.
1239 BdrvChild *bdrv_open_child(const char *filename,
1240 QDict *options, const char *bdref_key,
1241 BlockDriverState* parent,
1242 const BdrvChildRole *child_role,
1243 bool allow_none, Error **errp)
1245 BdrvChild *c = NULL;
1246 BlockDriverState *bs;
1247 QDict *image_options;
1249 char *bdref_key_dot;
1250 const char *reference;
1252 assert(child_role != NULL);
1254 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1255 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1256 g_free(bdref_key_dot);
1258 reference = qdict_get_try_str(options, bdref_key);
1259 if (!filename && !reference && !qdict_size(image_options)) {
1261 error_setg(errp, "A block device must be specified for \"%s\"",
1264 QDECREF(image_options);
1269 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1270 parent, child_role, errp);
1275 c = bdrv_attach_child(parent, bs, child_role);
1278 qdict_del(options, bdref_key);
1282 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1284 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1285 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1287 QemuOpts *opts = NULL;
1288 QDict *snapshot_options;
1289 BlockDriverState *bs_snapshot;
1290 Error *local_err = NULL;
1293 /* if snapshot, we create a temporary backing file and open it
1294 instead of opening 'filename' directly */
1296 /* Get the required size from the image */
1297 total_size = bdrv_getlength(bs);
1298 if (total_size < 0) {
1300 error_setg_errno(errp, -total_size, "Could not get image size");
1304 /* Create the temporary image */
1305 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1307 error_setg_errno(errp, -ret, "Could not get temporary filename");
1311 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1313 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1314 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1315 qemu_opts_del(opts);
1317 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1318 "'%s': %s", tmp_filename,
1319 error_get_pretty(local_err));
1320 error_free(local_err);
1324 /* Prepare a new options QDict for the temporary file */
1325 snapshot_options = qdict_new();
1326 qdict_put(snapshot_options, "file.driver",
1327 qstring_from_str("file"));
1328 qdict_put(snapshot_options, "file.filename",
1329 qstring_from_str(tmp_filename));
1330 qdict_put(snapshot_options, "driver",
1331 qstring_from_str("qcow2"));
1333 bs_snapshot = bdrv_new();
1335 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1338 error_propagate(errp, local_err);
1342 bdrv_append(bs_snapshot, bs);
1345 g_free(tmp_filename);
1350 * Opens a disk image (raw, qcow2, vmdk, ...)
1352 * options is a QDict of options to pass to the block drivers, or NULL for an
1353 * empty set of options. The reference to the QDict belongs to the block layer
1354 * after the call (even on failure), so if the caller intends to reuse the
1355 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1357 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1358 * If it is not NULL, the referenced BDS will be reused.
1360 * The reference parameter may be used to specify an existing block device which
1361 * should be opened. If specified, neither options nor a filename may be given,
1362 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1364 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1365 const char *reference, QDict *options, int flags,
1366 BlockDriverState *parent,
1367 const BdrvChildRole *child_role, Error **errp)
1370 BdrvChild *file = NULL;
1371 BlockDriverState *bs;
1372 BlockDriver *drv = NULL;
1373 const char *drvname;
1374 Error *local_err = NULL;
1375 int snapshot_flags = 0;
1378 assert(!child_role || !flags);
1379 assert(!child_role == !parent);
1382 bool options_non_empty = options ? qdict_size(options) : false;
1386 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1387 "another block device");
1391 if (filename || options_non_empty) {
1392 error_setg(errp, "Cannot reference an existing block device with "
1393 "additional options or a new filename");
1397 bs = bdrv_lookup_bs(reference, reference, errp);
1412 /* NULL means an empty set of options */
1413 if (options == NULL) {
1414 options = qdict_new();
1418 bs->inherits_from = parent;
1419 flags = child_role->inherit_flags(parent->open_flags);
1422 ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1427 /* Find the right image format driver */
1428 drvname = qdict_get_try_str(options, "driver");
1430 drv = bdrv_find_format(drvname);
1431 qdict_del(options, "driver");
1433 error_setg(errp, "Unknown driver: '%s'", drvname);
1439 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1441 bs->open_flags = flags;
1442 bs->options = options;
1443 options = qdict_clone_shallow(options);
1445 /* Open image file without format layer */
1446 if ((flags & BDRV_O_PROTOCOL) == 0) {
1447 if (flags & BDRV_O_RDWR) {
1448 flags |= BDRV_O_ALLOW_RDWR;
1450 if (flags & BDRV_O_SNAPSHOT) {
1451 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1452 flags = bdrv_backing_flags(flags);
1455 bs->open_flags = flags;
1457 file = bdrv_open_child(filename, options, "file", bs,
1458 &child_file, true, &local_err);
1465 /* Image format probing */
1468 ret = find_image_format(file->bs, filename, &drv, &local_err);
1473 error_setg(errp, "Must specify either driver or file");
1478 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1479 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1480 /* file must be NULL if a protocol BDS is about to be created
1481 * (the inverse results in an error message from bdrv_open_common()) */
1482 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1484 /* Open the image */
1485 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1490 if (file && (bs->file != file)) {
1491 bdrv_unref_child(bs, file);
1495 /* If there is a backing file, use it */
1496 if ((flags & BDRV_O_NO_BACKING) == 0) {
1497 QDict *backing_options;
1499 qdict_extract_subqdict(options, &backing_options, "backing.");
1500 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1502 goto close_and_fail;
1506 bdrv_refresh_filename(bs);
1508 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1509 * temporary snapshot afterwards. */
1510 if (snapshot_flags) {
1511 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1513 goto close_and_fail;
1517 /* Check if any unknown options were used */
1518 if (options && (qdict_size(options) != 0)) {
1519 const QDictEntry *entry = qdict_first(options);
1520 if (flags & BDRV_O_PROTOCOL) {
1521 error_setg(errp, "Block protocol '%s' doesn't support the option "
1522 "'%s'", drv->format_name, entry->key);
1524 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1525 "support the option '%s'", drv->format_name,
1526 bdrv_get_device_name(bs), entry->key);
1530 goto close_and_fail;
1533 if (!bdrv_key_required(bs)) {
1535 blk_dev_change_media_cb(bs->blk, true);
1537 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1538 && !runstate_check(RUN_STATE_INMIGRATE)
1539 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1541 "Guest must be stopped for opening of encrypted image");
1543 goto close_and_fail;
1552 bdrv_unref_child(bs, file);
1554 QDECREF(bs->options);
1558 /* If *pbs is NULL, a new BDS has been created in this function and
1559 needs to be freed now. Otherwise, it does not need to be closed,
1560 since it has not really been opened yet. */
1564 error_propagate(errp, local_err);
1569 /* See fail path, but now the BDS has to be always closed */
1577 error_propagate(errp, local_err);
1582 int bdrv_open(BlockDriverState **pbs, const char *filename,
1583 const char *reference, QDict *options, int flags, Error **errp)
1585 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1589 typedef struct BlockReopenQueueEntry {
1591 BDRVReopenState state;
1592 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1593 } BlockReopenQueueEntry;
1596 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1597 * reopen of multiple devices.
1599 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1600 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1601 * be created and initialized. This newly created BlockReopenQueue should be
1602 * passed back in for subsequent calls that are intended to be of the same
1605 * bs is the BlockDriverState to add to the reopen queue.
1607 * options contains the changed options for the associated bs
1608 * (the BlockReopenQueue takes ownership)
1610 * flags contains the open flags for the associated bs
1612 * returns a pointer to bs_queue, which is either the newly allocated
1613 * bs_queue, or the existing bs_queue being used.
1616 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1617 BlockDriverState *bs,
1618 QDict *options, int flags)
1622 BlockReopenQueueEntry *bs_entry;
1626 if (bs_queue == NULL) {
1627 bs_queue = g_new0(BlockReopenQueue, 1);
1628 QSIMPLEQ_INIT(bs_queue);
1632 options = qdict_new();
1635 old_options = qdict_clone_shallow(bs->options);
1636 qdict_join(options, old_options, false);
1637 QDECREF(old_options);
1639 /* bdrv_open() masks this flag out */
1640 flags &= ~BDRV_O_PROTOCOL;
1642 QLIST_FOREACH(child, &bs->children, next) {
1645 if (child->bs->inherits_from != bs) {
1649 child_flags = child->role->inherit_flags(flags);
1650 /* TODO Pass down child flags (backing.*, extents.*, ...) */
1651 bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1654 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1655 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1657 bs_entry->state.bs = bs;
1658 bs_entry->state.options = options;
1659 bs_entry->state.flags = flags;
1665 * Reopen multiple BlockDriverStates atomically & transactionally.
1667 * The queue passed in (bs_queue) must have been built up previous
1668 * via bdrv_reopen_queue().
1670 * Reopens all BDS specified in the queue, with the appropriate
1671 * flags. All devices are prepared for reopen, and failure of any
1672 * device will cause all device changes to be abandonded, and intermediate
1675 * If all devices prepare successfully, then the changes are committed
1679 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1682 BlockReopenQueueEntry *bs_entry, *next;
1683 Error *local_err = NULL;
1685 assert(bs_queue != NULL);
1689 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1690 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1691 error_propagate(errp, local_err);
1694 bs_entry->prepared = true;
1697 /* If we reach this point, we have success and just need to apply the
1700 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1701 bdrv_reopen_commit(&bs_entry->state);
1707 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1708 if (ret && bs_entry->prepared) {
1709 bdrv_reopen_abort(&bs_entry->state);
1711 QDECREF(bs_entry->state.options);
1719 /* Reopen a single BlockDriverState with the specified flags. */
1720 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1723 Error *local_err = NULL;
1724 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1726 ret = bdrv_reopen_multiple(queue, &local_err);
1727 if (local_err != NULL) {
1728 error_propagate(errp, local_err);
1735 * Prepares a BlockDriverState for reopen. All changes are staged in the
1736 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1737 * the block driver layer .bdrv_reopen_prepare()
1739 * bs is the BlockDriverState to reopen
1740 * flags are the new open flags
1741 * queue is the reopen queue
1743 * Returns 0 on success, non-zero on error. On error errp will be set
1746 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1747 * It is the responsibility of the caller to then call the abort() or
1748 * commit() for any other BDS that have been left in a prepare() state
1751 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1755 Error *local_err = NULL;
1758 assert(reopen_state != NULL);
1759 assert(reopen_state->bs->drv != NULL);
1760 drv = reopen_state->bs->drv;
1762 /* if we are to stay read-only, do not allow permission change
1764 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1765 reopen_state->flags & BDRV_O_RDWR) {
1766 error_setg(errp, "Node '%s' is read only",
1767 bdrv_get_device_or_node_name(reopen_state->bs));
1772 ret = bdrv_flush(reopen_state->bs);
1774 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1779 if (drv->bdrv_reopen_prepare) {
1780 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1782 if (local_err != NULL) {
1783 error_propagate(errp, local_err);
1785 error_setg(errp, "failed while preparing to reopen image '%s'",
1786 reopen_state->bs->filename);
1791 /* It is currently mandatory to have a bdrv_reopen_prepare()
1792 * handler for each supported drv. */
1793 error_setg(errp, "Block format '%s' used by node '%s' "
1794 "does not support reopening files", drv->format_name,
1795 bdrv_get_device_or_node_name(reopen_state->bs));
1800 /* Options that are not handled are only okay if they are unchanged
1801 * compared to the old state. It is expected that some options are only
1802 * used for the initial open, but not reopen (e.g. filename) */
1803 if (qdict_size(reopen_state->options)) {
1804 const QDictEntry *entry = qdict_first(reopen_state->options);
1807 QString *new_obj = qobject_to_qstring(entry->value);
1808 const char *new = qstring_get_str(new_obj);
1809 const char *old = qdict_get_try_str(reopen_state->bs->options,
1812 if (!old || strcmp(new, old)) {
1813 error_setg(errp, "Cannot change the option '%s'", entry->key);
1817 } while ((entry = qdict_next(reopen_state->options, entry)));
1827 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1828 * makes them final by swapping the staging BlockDriverState contents into
1829 * the active BlockDriverState contents.
1831 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1835 assert(reopen_state != NULL);
1836 drv = reopen_state->bs->drv;
1837 assert(drv != NULL);
1839 /* If there are any driver level actions to take */
1840 if (drv->bdrv_reopen_commit) {
1841 drv->bdrv_reopen_commit(reopen_state);
1844 /* set BDS specific flags now */
1845 reopen_state->bs->open_flags = reopen_state->flags;
1846 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1848 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1850 bdrv_refresh_limits(reopen_state->bs, NULL);
1854 * Abort the reopen, and delete and free the staged changes in
1857 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1861 assert(reopen_state != NULL);
1862 drv = reopen_state->bs->drv;
1863 assert(drv != NULL);
1865 if (drv->bdrv_reopen_abort) {
1866 drv->bdrv_reopen_abort(reopen_state);
1871 void bdrv_close(BlockDriverState *bs)
1873 BdrvAioNotifier *ban, *ban_next;
1876 block_job_cancel_sync(bs->job);
1879 /* Disable I/O limits and drain all pending throttled requests */
1880 if (bs->io_limits_enabled) {
1881 bdrv_io_limits_disable(bs);
1884 bdrv_drain(bs); /* complete I/O */
1886 bdrv_drain(bs); /* in case flush left pending I/O */
1887 notifier_list_notify(&bs->close_notifiers, bs);
1890 BdrvChild *child, *next;
1892 bs->drv->bdrv_close(bs);
1895 if (bs->backing_hd) {
1896 BlockDriverState *backing_hd = bs->backing_hd;
1897 bdrv_set_backing_hd(bs, NULL);
1898 bdrv_unref(backing_hd);
1901 if (bs->file != NULL) {
1902 bdrv_unref_child(bs, bs->file);
1906 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1907 /* TODO Remove bdrv_unref() from drivers' close function and use
1908 * bdrv_unref_child() here */
1909 if (child->bs->inherits_from == bs) {
1910 child->bs->inherits_from = NULL;
1912 bdrv_detach_child(child);
1917 bs->copy_on_read = 0;
1918 bs->backing_file[0] = '\0';
1919 bs->backing_format[0] = '\0';
1920 bs->total_sectors = 0;
1924 bs->zero_beyond_eof = false;
1925 QDECREF(bs->options);
1927 QDECREF(bs->full_open_options);
1928 bs->full_open_options = NULL;
1932 blk_dev_change_media_cb(bs->blk, false);
1935 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1938 QLIST_INIT(&bs->aio_notifiers);
1941 void bdrv_close_all(void)
1943 BlockDriverState *bs;
1945 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1946 AioContext *aio_context = bdrv_get_aio_context(bs);
1948 aio_context_acquire(aio_context);
1950 aio_context_release(aio_context);
1954 /* make a BlockDriverState anonymous by removing from bdrv_state and
1955 * graph_bdrv_state list.
1956 Also, NULL terminate the device_name to prevent double remove */
1957 void bdrv_make_anon(BlockDriverState *bs)
1960 * Take care to remove bs from bdrv_states only when it's actually
1961 * in it. Note that bs->device_list.tqe_prev is initially null,
1962 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1963 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1964 * resetting it to null on remove.
1966 if (bs->device_list.tqe_prev) {
1967 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1968 bs->device_list.tqe_prev = NULL;
1970 if (bs->node_name[0] != '\0') {
1971 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1973 bs->node_name[0] = '\0';
1976 static void bdrv_rebind(BlockDriverState *bs)
1978 if (bs->drv && bs->drv->bdrv_rebind) {
1979 bs->drv->bdrv_rebind(bs);
1983 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1984 BlockDriverState *bs_src)
1986 /* move some fields that need to stay attached to the device */
1989 bs_dest->guest_block_size = bs_src->guest_block_size;
1990 bs_dest->copy_on_read = bs_src->copy_on_read;
1992 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1994 /* i/o throttled req */
1995 bs_dest->throttle_state = bs_src->throttle_state,
1996 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1997 bs_dest->pending_reqs[0] = bs_src->pending_reqs[0];
1998 bs_dest->pending_reqs[1] = bs_src->pending_reqs[1];
1999 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2000 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
2001 memcpy(&bs_dest->round_robin,
2002 &bs_src->round_robin,
2003 sizeof(bs_dest->round_robin));
2004 memcpy(&bs_dest->throttle_timers,
2005 &bs_src->throttle_timers,
2006 sizeof(ThrottleTimers));
2009 bs_dest->on_read_error = bs_src->on_read_error;
2010 bs_dest->on_write_error = bs_src->on_write_error;
2013 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2014 bs_dest->iostatus = bs_src->iostatus;
2017 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2019 /* reference count */
2020 bs_dest->refcnt = bs_src->refcnt;
2023 bs_dest->job = bs_src->job;
2025 /* keep the same entry in bdrv_states */
2026 bs_dest->device_list = bs_src->device_list;
2027 bs_dest->blk = bs_src->blk;
2029 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2030 sizeof(bs_dest->op_blockers));
2034 * Swap bs contents for two image chains while they are live,
2035 * while keeping required fields on the BlockDriverState that is
2036 * actually attached to a device.
2038 * This will modify the BlockDriverState fields, and swap contents
2039 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2041 * bs_new must not be attached to a BlockBackend.
2043 * This function does not create any image files.
2045 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2047 BlockDriverState tmp;
2053 /* The code needs to swap the node_name but simply swapping node_list won't
2054 * work so first remove the nodes from the graph list, do the swap then
2055 * insert them back if needed.
2057 if (bs_new->node_name[0] != '\0') {
2058 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2060 if (bs_old->node_name[0] != '\0') {
2061 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2064 /* If the BlockDriverState is part of a throttling group acquire
2065 * its lock since we're going to mess with the protected fields.
2066 * Otherwise there's no need to worry since no one else can touch
2068 if (bs_old->throttle_state) {
2069 throttle_group_lock(bs_old);
2072 /* bs_new must be unattached and shouldn't have anything fancy enabled */
2073 assert(!bs_new->blk);
2074 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
2075 assert(bs_new->job == NULL);
2076 assert(bs_new->io_limits_enabled == false);
2077 assert(bs_new->throttle_state == NULL);
2078 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2084 /* there are some fields that should not be swapped, move them back */
2085 bdrv_move_feature_fields(&tmp, bs_old);
2086 bdrv_move_feature_fields(bs_old, bs_new);
2087 bdrv_move_feature_fields(bs_new, &tmp);
2089 /* bs_new must remain unattached */
2090 assert(!bs_new->blk);
2092 /* Check a few fields that should remain attached to the device */
2093 assert(bs_new->job == NULL);
2094 assert(bs_new->io_limits_enabled == false);
2095 assert(bs_new->throttle_state == NULL);
2096 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2098 /* Release the ThrottleGroup lock */
2099 if (bs_old->throttle_state) {
2100 throttle_group_unlock(bs_old);
2103 /* insert the nodes back into the graph node list if needed */
2104 if (bs_new->node_name[0] != '\0') {
2105 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2107 if (bs_old->node_name[0] != '\0') {
2108 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2112 * Update lh_first.le_prev for non-empty lists.
2114 * The head of the op blocker list doesn't change because it is moved back
2115 * in bdrv_move_feature_fields().
2117 assert(QLIST_EMPTY(&bs_old->tracked_requests));
2118 assert(QLIST_EMPTY(&bs_new->tracked_requests));
2120 QLIST_FIX_HEAD_PTR(&bs_new->children, next);
2121 QLIST_FIX_HEAD_PTR(&bs_old->children, next);
2123 /* Update references in bs->opaque and children */
2124 QLIST_FOREACH(child, &bs_old->children, next) {
2125 if (child->bs->inherits_from == bs_new) {
2126 child->bs->inherits_from = bs_old;
2129 QLIST_FOREACH(child, &bs_new->children, next) {
2130 if (child->bs->inherits_from == bs_old) {
2131 child->bs->inherits_from = bs_new;
2135 bdrv_rebind(bs_new);
2136 bdrv_rebind(bs_old);
2140 * Add new bs contents at the top of an image chain while the chain is
2141 * live, while keeping required fields on the top layer.
2143 * This will modify the BlockDriverState fields, and swap contents
2144 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2146 * bs_new must not be attached to a BlockBackend.
2148 * This function does not create any image files.
2150 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2152 bdrv_swap(bs_new, bs_top);
2154 /* The contents of 'tmp' will become bs_top, as we are
2155 * swapping bs_new and bs_top contents. */
2156 bdrv_set_backing_hd(bs_top, bs_new);
2159 static void bdrv_delete(BlockDriverState *bs)
2162 assert(bdrv_op_blocker_is_empty(bs));
2163 assert(!bs->refcnt);
2164 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2168 /* remove from list, if necessary */
2175 * Run consistency checks on an image
2177 * Returns 0 if the check could be completed (it doesn't mean that the image is
2178 * free of errors) or -errno when an internal error occurred. The results of the
2179 * check are stored in res.
2181 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2183 if (bs->drv == NULL) {
2186 if (bs->drv->bdrv_check == NULL) {
2190 memset(res, 0, sizeof(*res));
2191 return bs->drv->bdrv_check(bs, res, fix);
2194 #define COMMIT_BUF_SECTORS 2048
2196 /* commit COW file into the raw image */
2197 int bdrv_commit(BlockDriverState *bs)
2199 BlockDriver *drv = bs->drv;
2200 int64_t sector, total_sectors, length, backing_length;
2201 int n, ro, open_flags;
2203 uint8_t *buf = NULL;
2208 if (!bs->backing_hd) {
2212 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2213 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2217 ro = bs->backing_hd->read_only;
2218 open_flags = bs->backing_hd->open_flags;
2221 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2226 length = bdrv_getlength(bs);
2232 backing_length = bdrv_getlength(bs->backing_hd);
2233 if (backing_length < 0) {
2234 ret = backing_length;
2238 /* If our top snapshot is larger than the backing file image,
2239 * grow the backing file image if possible. If not possible,
2240 * we must return an error */
2241 if (length > backing_length) {
2242 ret = bdrv_truncate(bs->backing_hd, length);
2248 total_sectors = length >> BDRV_SECTOR_BITS;
2250 /* qemu_try_blockalign() for bs will choose an alignment that works for
2251 * bs->backing_hd as well, so no need to compare the alignment manually. */
2252 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2258 for (sector = 0; sector < total_sectors; sector += n) {
2259 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2264 ret = bdrv_read(bs, sector, buf, n);
2269 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2276 if (drv->bdrv_make_empty) {
2277 ret = drv->bdrv_make_empty(bs);
2285 * Make sure all data we wrote to the backing device is actually
2288 if (bs->backing_hd) {
2289 bdrv_flush(bs->backing_hd);
2297 /* ignoring error return here */
2298 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2304 int bdrv_commit_all(void)
2306 BlockDriverState *bs;
2308 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2309 AioContext *aio_context = bdrv_get_aio_context(bs);
2311 aio_context_acquire(aio_context);
2312 if (bs->drv && bs->backing_hd) {
2313 int ret = bdrv_commit(bs);
2315 aio_context_release(aio_context);
2319 aio_context_release(aio_context);
2327 * -EINVAL - backing format specified, but no file
2328 * -ENOSPC - can't update the backing file because no space is left in the
2330 * -ENOTSUP - format driver doesn't support changing the backing file
2332 int bdrv_change_backing_file(BlockDriverState *bs,
2333 const char *backing_file, const char *backing_fmt)
2335 BlockDriver *drv = bs->drv;
2338 /* Backing file format doesn't make sense without a backing file */
2339 if (backing_fmt && !backing_file) {
2343 if (drv->bdrv_change_backing_file != NULL) {
2344 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2350 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2351 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2357 * Finds the image layer in the chain that has 'bs' as its backing file.
2359 * active is the current topmost image.
2361 * Returns NULL if bs is not found in active's image chain,
2362 * or if active == bs.
2364 * Returns the bottommost base image if bs == NULL.
2366 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2367 BlockDriverState *bs)
2369 while (active && bs != active->backing_hd) {
2370 active = active->backing_hd;
2376 /* Given a BDS, searches for the base layer. */
2377 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2379 return bdrv_find_overlay(bs, NULL);
2382 typedef struct BlkIntermediateStates {
2383 BlockDriverState *bs;
2384 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2385 } BlkIntermediateStates;
2389 * Drops images above 'base' up to and including 'top', and sets the image
2390 * above 'top' to have base as its backing file.
2392 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2393 * information in 'bs' can be properly updated.
2395 * E.g., this will convert the following chain:
2396 * bottom <- base <- intermediate <- top <- active
2400 * bottom <- base <- active
2402 * It is allowed for bottom==base, in which case it converts:
2404 * base <- intermediate <- top <- active
2410 * If backing_file_str is non-NULL, it will be used when modifying top's
2411 * overlay image metadata.
2414 * if active == top, that is considered an error
2417 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2418 BlockDriverState *base, const char *backing_file_str)
2420 BlockDriverState *intermediate;
2421 BlockDriverState *base_bs = NULL;
2422 BlockDriverState *new_top_bs = NULL;
2423 BlkIntermediateStates *intermediate_state, *next;
2426 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2427 QSIMPLEQ_INIT(&states_to_delete);
2429 if (!top->drv || !base->drv) {
2433 new_top_bs = bdrv_find_overlay(active, top);
2435 if (new_top_bs == NULL) {
2436 /* we could not find the image above 'top', this is an error */
2440 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2441 * to do, no intermediate images */
2442 if (new_top_bs->backing_hd == base) {
2449 /* now we will go down through the list, and add each BDS we find
2450 * into our deletion queue, until we hit the 'base'
2452 while (intermediate) {
2453 intermediate_state = g_new0(BlkIntermediateStates, 1);
2454 intermediate_state->bs = intermediate;
2455 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2457 if (intermediate->backing_hd == base) {
2458 base_bs = intermediate->backing_hd;
2461 intermediate = intermediate->backing_hd;
2463 if (base_bs == NULL) {
2464 /* something went wrong, we did not end at the base. safely
2465 * unravel everything, and exit with error */
2469 /* success - we can delete the intermediate states, and link top->base */
2470 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2471 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2472 base_bs->drv ? base_bs->drv->format_name : "");
2476 bdrv_set_backing_hd(new_top_bs, base_bs);
2478 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2479 /* so that bdrv_close() does not recursively close the chain */
2480 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2481 bdrv_unref(intermediate_state->bs);
2486 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2487 g_free(intermediate_state);
2493 * Truncate file to 'offset' bytes (needed only for file protocols)
2495 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2497 BlockDriver *drv = bs->drv;
2501 if (!drv->bdrv_truncate)
2506 ret = drv->bdrv_truncate(bs, offset);
2508 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2509 bdrv_dirty_bitmap_truncate(bs);
2511 blk_dev_resize_cb(bs->blk);
2518 * Length of a allocated file in bytes. Sparse files are counted by actual
2519 * allocated space. Return < 0 if error or unknown.
2521 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2523 BlockDriver *drv = bs->drv;
2527 if (drv->bdrv_get_allocated_file_size) {
2528 return drv->bdrv_get_allocated_file_size(bs);
2531 return bdrv_get_allocated_file_size(bs->file->bs);
2537 * Return number of sectors on success, -errno on error.
2539 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2541 BlockDriver *drv = bs->drv;
2546 if (drv->has_variable_length) {
2547 int ret = refresh_total_sectors(bs, bs->total_sectors);
2552 return bs->total_sectors;
2556 * Return length in bytes on success, -errno on error.
2557 * The length is always a multiple of BDRV_SECTOR_SIZE.
2559 int64_t bdrv_getlength(BlockDriverState *bs)
2561 int64_t ret = bdrv_nb_sectors(bs);
2563 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2564 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2567 /* return 0 as number of sectors if no device present or error */
2568 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2570 int64_t nb_sectors = bdrv_nb_sectors(bs);
2572 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2575 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2576 BlockdevOnError on_write_error)
2578 bs->on_read_error = on_read_error;
2579 bs->on_write_error = on_write_error;
2582 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2584 return is_read ? bs->on_read_error : bs->on_write_error;
2587 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2589 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2592 case BLOCKDEV_ON_ERROR_ENOSPC:
2593 return (error == ENOSPC) ?
2594 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2595 case BLOCKDEV_ON_ERROR_STOP:
2596 return BLOCK_ERROR_ACTION_STOP;
2597 case BLOCKDEV_ON_ERROR_REPORT:
2598 return BLOCK_ERROR_ACTION_REPORT;
2599 case BLOCKDEV_ON_ERROR_IGNORE:
2600 return BLOCK_ERROR_ACTION_IGNORE;
2606 static void send_qmp_error_event(BlockDriverState *bs,
2607 BlockErrorAction action,
2608 bool is_read, int error)
2610 IoOperationType optype;
2612 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2613 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2614 bdrv_iostatus_is_enabled(bs),
2615 error == ENOSPC, strerror(error),
2619 /* This is done by device models because, while the block layer knows
2620 * about the error, it does not know whether an operation comes from
2621 * the device or the block layer (from a job, for example).
2623 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2624 bool is_read, int error)
2628 if (action == BLOCK_ERROR_ACTION_STOP) {
2629 /* First set the iostatus, so that "info block" returns an iostatus
2630 * that matches the events raised so far (an additional error iostatus
2631 * is fine, but not a lost one).
2633 bdrv_iostatus_set_err(bs, error);
2635 /* Then raise the request to stop the VM and the event.
2636 * qemu_system_vmstop_request_prepare has two effects. First,
2637 * it ensures that the STOP event always comes after the
2638 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2639 * can observe the STOP event and do a "cont" before the STOP
2640 * event is issued, the VM will not stop. In this case, vm_start()
2641 * also ensures that the STOP/RESUME pair of events is emitted.
2643 qemu_system_vmstop_request_prepare();
2644 send_qmp_error_event(bs, action, is_read, error);
2645 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2647 send_qmp_error_event(bs, action, is_read, error);
2651 int bdrv_is_read_only(BlockDriverState *bs)
2653 return bs->read_only;
2656 int bdrv_is_sg(BlockDriverState *bs)
2661 int bdrv_enable_write_cache(BlockDriverState *bs)
2663 return bs->enable_write_cache;
2666 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2668 bs->enable_write_cache = wce;
2670 /* so a reopen() will preserve wce */
2672 bs->open_flags |= BDRV_O_CACHE_WB;
2674 bs->open_flags &= ~BDRV_O_CACHE_WB;
2678 int bdrv_is_encrypted(BlockDriverState *bs)
2680 if (bs->backing_hd && bs->backing_hd->encrypted)
2682 return bs->encrypted;
2685 int bdrv_key_required(BlockDriverState *bs)
2687 BlockDriverState *backing_hd = bs->backing_hd;
2689 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2691 return (bs->encrypted && !bs->valid_key);
2694 int bdrv_set_key(BlockDriverState *bs, const char *key)
2697 if (bs->backing_hd && bs->backing_hd->encrypted) {
2698 ret = bdrv_set_key(bs->backing_hd, key);
2704 if (!bs->encrypted) {
2706 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2709 ret = bs->drv->bdrv_set_key(bs, key);
2712 } else if (!bs->valid_key) {
2715 /* call the change callback now, we skipped it on open */
2716 blk_dev_change_media_cb(bs->blk, true);
2723 * Provide an encryption key for @bs.
2724 * If @key is non-null:
2725 * If @bs is not encrypted, fail.
2726 * Else if the key is invalid, fail.
2727 * Else set @bs's key to @key, replacing the existing key, if any.
2729 * If @bs is encrypted and still lacks a key, fail.
2731 * On failure, store an error object through @errp if non-null.
2733 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2736 if (!bdrv_is_encrypted(bs)) {
2737 error_setg(errp, "Node '%s' is not encrypted",
2738 bdrv_get_device_or_node_name(bs));
2739 } else if (bdrv_set_key(bs, key) < 0) {
2740 error_setg(errp, QERR_INVALID_PASSWORD);
2743 if (bdrv_key_required(bs)) {
2744 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2745 "'%s' (%s) is encrypted",
2746 bdrv_get_device_or_node_name(bs),
2747 bdrv_get_encrypted_filename(bs));
2752 const char *bdrv_get_format_name(BlockDriverState *bs)
2754 return bs->drv ? bs->drv->format_name : NULL;
2757 static int qsort_strcmp(const void *a, const void *b)
2759 return strcmp(a, b);
2762 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2768 const char **formats = NULL;
2770 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2771 if (drv->format_name) {
2774 while (formats && i && !found) {
2775 found = !strcmp(formats[--i], drv->format_name);
2779 formats = g_renew(const char *, formats, count + 1);
2780 formats[count++] = drv->format_name;
2785 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2787 for (i = 0; i < count; i++) {
2788 it(opaque, formats[i]);
2794 /* This function is to find a node in the bs graph */
2795 BlockDriverState *bdrv_find_node(const char *node_name)
2797 BlockDriverState *bs;
2801 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2802 if (!strcmp(node_name, bs->node_name)) {
2809 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2810 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2812 BlockDeviceInfoList *list, *entry;
2813 BlockDriverState *bs;
2816 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2817 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2819 qapi_free_BlockDeviceInfoList(list);
2822 entry = g_malloc0(sizeof(*entry));
2823 entry->value = info;
2831 BlockDriverState *bdrv_lookup_bs(const char *device,
2832 const char *node_name,
2836 BlockDriverState *bs;
2839 blk = blk_by_name(device);
2847 bs = bdrv_find_node(node_name);
2854 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2855 device ? device : "",
2856 node_name ? node_name : "");
2860 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2861 * return false. If either argument is NULL, return false. */
2862 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2864 while (top && top != base) {
2865 top = top->backing_hd;
2871 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2874 return QTAILQ_FIRST(&graph_bdrv_states);
2876 return QTAILQ_NEXT(bs, node_list);
2879 BlockDriverState *bdrv_next(BlockDriverState *bs)
2882 return QTAILQ_FIRST(&bdrv_states);
2884 return QTAILQ_NEXT(bs, device_list);
2887 const char *bdrv_get_node_name(const BlockDriverState *bs)
2889 return bs->node_name;
2892 /* TODO check what callers really want: bs->node_name or blk_name() */
2893 const char *bdrv_get_device_name(const BlockDriverState *bs)
2895 return bs->blk ? blk_name(bs->blk) : "";
2898 /* This can be used to identify nodes that might not have a device
2899 * name associated. Since node and device names live in the same
2900 * namespace, the result is unambiguous. The exception is if both are
2901 * absent, then this returns an empty (non-null) string. */
2902 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2904 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2907 int bdrv_get_flags(BlockDriverState *bs)
2909 return bs->open_flags;
2912 int bdrv_has_zero_init_1(BlockDriverState *bs)
2917 int bdrv_has_zero_init(BlockDriverState *bs)
2921 /* If BS is a copy on write image, it is initialized to
2922 the contents of the base image, which may not be zeroes. */
2923 if (bs->backing_hd) {
2926 if (bs->drv->bdrv_has_zero_init) {
2927 return bs->drv->bdrv_has_zero_init(bs);
2934 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2936 BlockDriverInfo bdi;
2938 if (bs->backing_hd) {
2942 if (bdrv_get_info(bs, &bdi) == 0) {
2943 return bdi.unallocated_blocks_are_zero;
2949 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2951 BlockDriverInfo bdi;
2953 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2957 if (bdrv_get_info(bs, &bdi) == 0) {
2958 return bdi.can_write_zeroes_with_unmap;
2964 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2966 if (bs->backing_hd && bs->backing_hd->encrypted)
2967 return bs->backing_file;
2968 else if (bs->encrypted)
2969 return bs->filename;
2974 void bdrv_get_backing_filename(BlockDriverState *bs,
2975 char *filename, int filename_size)
2977 pstrcpy(filename, filename_size, bs->backing_file);
2980 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2982 BlockDriver *drv = bs->drv;
2985 if (!drv->bdrv_get_info)
2987 memset(bdi, 0, sizeof(*bdi));
2988 return drv->bdrv_get_info(bs, bdi);
2991 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2993 BlockDriver *drv = bs->drv;
2994 if (drv && drv->bdrv_get_specific_info) {
2995 return drv->bdrv_get_specific_info(bs);
3000 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
3002 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3006 bs->drv->bdrv_debug_event(bs, event);
3009 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3012 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3013 bs = bs->file ? bs->file->bs : NULL;
3016 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3017 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3023 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3025 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3026 bs = bs->file ? bs->file->bs : NULL;
3029 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3030 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3036 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3038 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3039 bs = bs->file ? bs->file->bs : NULL;
3042 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3043 return bs->drv->bdrv_debug_resume(bs, tag);
3049 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3051 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3052 bs = bs->file ? bs->file->bs : NULL;
3055 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3056 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3062 int bdrv_is_snapshot(BlockDriverState *bs)
3064 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3067 /* backing_file can either be relative, or absolute, or a protocol. If it is
3068 * relative, it must be relative to the chain. So, passing in bs->filename
3069 * from a BDS as backing_file should not be done, as that may be relative to
3070 * the CWD rather than the chain. */
3071 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3072 const char *backing_file)
3074 char *filename_full = NULL;
3075 char *backing_file_full = NULL;
3076 char *filename_tmp = NULL;
3077 int is_protocol = 0;
3078 BlockDriverState *curr_bs = NULL;
3079 BlockDriverState *retval = NULL;
3081 if (!bs || !bs->drv || !backing_file) {
3085 filename_full = g_malloc(PATH_MAX);
3086 backing_file_full = g_malloc(PATH_MAX);
3087 filename_tmp = g_malloc(PATH_MAX);
3089 is_protocol = path_has_protocol(backing_file);
3091 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
3093 /* If either of the filename paths is actually a protocol, then
3094 * compare unmodified paths; otherwise make paths relative */
3095 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3096 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3097 retval = curr_bs->backing_hd;
3101 /* If not an absolute filename path, make it relative to the current
3102 * image's filename path */
3103 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3106 /* We are going to compare absolute pathnames */
3107 if (!realpath(filename_tmp, filename_full)) {
3111 /* We need to make sure the backing filename we are comparing against
3112 * is relative to the current image filename (or absolute) */
3113 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3114 curr_bs->backing_file);
3116 if (!realpath(filename_tmp, backing_file_full)) {
3120 if (strcmp(backing_file_full, filename_full) == 0) {
3121 retval = curr_bs->backing_hd;
3127 g_free(filename_full);
3128 g_free(backing_file_full);
3129 g_free(filename_tmp);
3133 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3139 if (!bs->backing_hd) {
3143 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3146 void bdrv_init(void)
3148 module_call_init(MODULE_INIT_BLOCK);
3151 void bdrv_init_with_whitelist(void)
3153 use_bdrv_whitelist = 1;
3157 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3159 Error *local_err = NULL;
3166 if (!(bs->open_flags & BDRV_O_INCOMING)) {
3169 bs->open_flags &= ~BDRV_O_INCOMING;
3171 if (bs->drv->bdrv_invalidate_cache) {
3172 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3173 } else if (bs->file) {
3174 bdrv_invalidate_cache(bs->file->bs, &local_err);
3177 error_propagate(errp, local_err);
3181 ret = refresh_total_sectors(bs, bs->total_sectors);
3183 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3188 void bdrv_invalidate_cache_all(Error **errp)
3190 BlockDriverState *bs;
3191 Error *local_err = NULL;
3193 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3194 AioContext *aio_context = bdrv_get_aio_context(bs);
3196 aio_context_acquire(aio_context);
3197 bdrv_invalidate_cache(bs, &local_err);
3198 aio_context_release(aio_context);
3200 error_propagate(errp, local_err);
3206 /**************************************************************/
3207 /* removable device support */
3210 * Return TRUE if the media is present
3212 int bdrv_is_inserted(BlockDriverState *bs)
3214 BlockDriver *drv = bs->drv;
3218 if (!drv->bdrv_is_inserted)
3220 return drv->bdrv_is_inserted(bs);
3224 * Return whether the media changed since the last call to this
3225 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3227 int bdrv_media_changed(BlockDriverState *bs)
3229 BlockDriver *drv = bs->drv;
3231 if (drv && drv->bdrv_media_changed) {
3232 return drv->bdrv_media_changed(bs);
3238 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3240 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3242 BlockDriver *drv = bs->drv;
3243 const char *device_name;
3245 if (drv && drv->bdrv_eject) {
3246 drv->bdrv_eject(bs, eject_flag);
3249 device_name = bdrv_get_device_name(bs);
3250 if (device_name[0] != '\0') {
3251 qapi_event_send_device_tray_moved(device_name,
3252 eject_flag, &error_abort);
3257 * Lock or unlock the media (if it is locked, the user won't be able
3258 * to eject it manually).
3260 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3262 BlockDriver *drv = bs->drv;
3264 trace_bdrv_lock_medium(bs, locked);
3266 if (drv && drv->bdrv_lock_medium) {
3267 drv->bdrv_lock_medium(bs, locked);
3271 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3273 bs->guest_block_size = align;
3276 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3278 BdrvDirtyBitmap *bm;
3281 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3282 if (bm->name && !strcmp(name, bm->name)) {
3289 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3291 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3292 g_free(bitmap->name);
3293 bitmap->name = NULL;
3296 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3297 uint32_t granularity,
3301 int64_t bitmap_size;
3302 BdrvDirtyBitmap *bitmap;
3303 uint32_t sector_granularity;
3305 assert((granularity & (granularity - 1)) == 0);
3307 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3308 error_setg(errp, "Bitmap already exists: %s", name);
3311 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3312 assert(sector_granularity);
3313 bitmap_size = bdrv_nb_sectors(bs);
3314 if (bitmap_size < 0) {
3315 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3316 errno = -bitmap_size;
3319 bitmap = g_new0(BdrvDirtyBitmap, 1);
3320 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3321 bitmap->size = bitmap_size;
3322 bitmap->name = g_strdup(name);
3323 bitmap->disabled = false;
3324 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3328 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3330 return bitmap->successor;
3333 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3335 return !(bitmap->disabled || bitmap->successor);
3338 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3340 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3341 return DIRTY_BITMAP_STATUS_FROZEN;
3342 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3343 return DIRTY_BITMAP_STATUS_DISABLED;
3345 return DIRTY_BITMAP_STATUS_ACTIVE;
3350 * Create a successor bitmap destined to replace this bitmap after an operation.
3351 * Requires that the bitmap is not frozen and has no successor.
3353 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3354 BdrvDirtyBitmap *bitmap, Error **errp)
3356 uint64_t granularity;
3357 BdrvDirtyBitmap *child;
3359 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3360 error_setg(errp, "Cannot create a successor for a bitmap that is "
3361 "currently frozen");
3364 assert(!bitmap->successor);
3366 /* Create an anonymous successor */
3367 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3368 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3373 /* Successor will be on or off based on our current state. */
3374 child->disabled = bitmap->disabled;
3376 /* Install the successor and freeze the parent */
3377 bitmap->successor = child;
3382 * For a bitmap with a successor, yield our name to the successor,
3383 * delete the old bitmap, and return a handle to the new bitmap.
3385 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3386 BdrvDirtyBitmap *bitmap,
3390 BdrvDirtyBitmap *successor = bitmap->successor;
3392 if (successor == NULL) {
3393 error_setg(errp, "Cannot relinquish control if "
3394 "there's no successor present");
3398 name = bitmap->name;
3399 bitmap->name = NULL;
3400 successor->name = name;
3401 bitmap->successor = NULL;
3402 bdrv_release_dirty_bitmap(bs, bitmap);
3408 * In cases of failure where we can no longer safely delete the parent,
3409 * we may wish to re-join the parent and child/successor.
3410 * The merged parent will be un-frozen, but not explicitly re-enabled.
3412 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3413 BdrvDirtyBitmap *parent,
3416 BdrvDirtyBitmap *successor = parent->successor;
3419 error_setg(errp, "Cannot reclaim a successor when none is present");
3423 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3424 error_setg(errp, "Merging of parent and successor bitmap failed");
3427 bdrv_release_dirty_bitmap(bs, successor);
3428 parent->successor = NULL;
3434 * Truncates _all_ bitmaps attached to a BDS.
3436 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3438 BdrvDirtyBitmap *bitmap;
3439 uint64_t size = bdrv_nb_sectors(bs);
3441 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3442 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3443 hbitmap_truncate(bitmap->bitmap, size);
3444 bitmap->size = size;
3448 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3450 BdrvDirtyBitmap *bm, *next;
3451 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3453 assert(!bdrv_dirty_bitmap_frozen(bm));
3454 QLIST_REMOVE(bitmap, list);
3455 hbitmap_free(bitmap->bitmap);
3456 g_free(bitmap->name);
3463 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3465 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3466 bitmap->disabled = true;
3469 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3471 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3472 bitmap->disabled = false;
3475 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3477 BdrvDirtyBitmap *bm;
3478 BlockDirtyInfoList *list = NULL;
3479 BlockDirtyInfoList **plist = &list;
3481 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3482 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3483 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3484 info->count = bdrv_get_dirty_count(bm);
3485 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3486 info->has_name = !!bm->name;
3487 info->name = g_strdup(bm->name);
3488 info->status = bdrv_dirty_bitmap_status(bm);
3489 entry->value = info;
3491 plist = &entry->next;
3497 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3500 return hbitmap_get(bitmap->bitmap, sector);
3507 * Chooses a default granularity based on the existing cluster size,
3508 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3509 * is no cluster size information available.
3511 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3513 BlockDriverInfo bdi;
3514 uint32_t granularity;
3516 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3517 granularity = MAX(4096, bdi.cluster_size);
3518 granularity = MIN(65536, granularity);
3520 granularity = 65536;
3526 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3528 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3531 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3533 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3536 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3537 int64_t cur_sector, int nr_sectors)
3539 assert(bdrv_dirty_bitmap_enabled(bitmap));
3540 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3543 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3544 int64_t cur_sector, int nr_sectors)
3546 assert(bdrv_dirty_bitmap_enabled(bitmap));
3547 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3550 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3552 assert(bdrv_dirty_bitmap_enabled(bitmap));
3553 hbitmap_reset_all(bitmap->bitmap);
3556 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3559 BdrvDirtyBitmap *bitmap;
3560 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3561 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3564 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3569 * Advance an HBitmapIter to an arbitrary offset.
3571 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3574 hbitmap_iter_init(hbi, hbi->hb, offset);
3577 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3579 return hbitmap_count(bitmap->bitmap);
3582 /* Get a reference to bs */
3583 void bdrv_ref(BlockDriverState *bs)
3588 /* Release a previously grabbed reference to bs.
3589 * If after releasing, reference count is zero, the BlockDriverState is
3591 void bdrv_unref(BlockDriverState *bs)
3596 assert(bs->refcnt > 0);
3597 if (--bs->refcnt == 0) {
3602 struct BdrvOpBlocker {
3604 QLIST_ENTRY(BdrvOpBlocker) list;
3607 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3609 BdrvOpBlocker *blocker;
3610 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3611 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3612 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3614 error_setg(errp, "Node '%s' is busy: %s",
3615 bdrv_get_device_or_node_name(bs),
3616 error_get_pretty(blocker->reason));
3623 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3625 BdrvOpBlocker *blocker;
3626 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3628 blocker = g_new0(BdrvOpBlocker, 1);
3629 blocker->reason = reason;
3630 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3633 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3635 BdrvOpBlocker *blocker, *next;
3636 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3637 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3638 if (blocker->reason == reason) {
3639 QLIST_REMOVE(blocker, list);
3645 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3648 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3649 bdrv_op_block(bs, i, reason);
3653 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3656 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3657 bdrv_op_unblock(bs, i, reason);
3661 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3665 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3666 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3673 void bdrv_iostatus_enable(BlockDriverState *bs)
3675 bs->iostatus_enabled = true;
3676 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3679 /* The I/O status is only enabled if the drive explicitly
3680 * enables it _and_ the VM is configured to stop on errors */
3681 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3683 return (bs->iostatus_enabled &&
3684 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3685 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3686 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3689 void bdrv_iostatus_disable(BlockDriverState *bs)
3691 bs->iostatus_enabled = false;
3694 void bdrv_iostatus_reset(BlockDriverState *bs)
3696 if (bdrv_iostatus_is_enabled(bs)) {
3697 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3699 block_job_iostatus_reset(bs->job);
3704 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3706 assert(bdrv_iostatus_is_enabled(bs));
3707 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3708 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3709 BLOCK_DEVICE_IO_STATUS_FAILED;
3713 void bdrv_img_create(const char *filename, const char *fmt,
3714 const char *base_filename, const char *base_fmt,
3715 char *options, uint64_t img_size, int flags,
3716 Error **errp, bool quiet)
3718 QemuOptsList *create_opts = NULL;
3719 QemuOpts *opts = NULL;
3720 const char *backing_fmt, *backing_file;
3722 BlockDriver *drv, *proto_drv;
3723 Error *local_err = NULL;
3726 /* Find driver and parse its options */
3727 drv = bdrv_find_format(fmt);
3729 error_setg(errp, "Unknown file format '%s'", fmt);
3733 proto_drv = bdrv_find_protocol(filename, true, errp);
3738 if (!drv->create_opts) {
3739 error_setg(errp, "Format driver '%s' does not support image creation",
3744 if (!proto_drv->create_opts) {
3745 error_setg(errp, "Protocol driver '%s' does not support image creation",
3746 proto_drv->format_name);
3750 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3751 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3753 /* Create parameter list with default values */
3754 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3755 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3757 /* Parse -o options */
3759 qemu_opts_do_parse(opts, options, NULL, &local_err);
3761 error_report_err(local_err);
3763 error_setg(errp, "Invalid options for file format '%s'", fmt);
3768 if (base_filename) {
3769 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3771 error_setg(errp, "Backing file not supported for file format '%s'",
3778 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3780 error_setg(errp, "Backing file format not supported for file "
3781 "format '%s'", fmt);
3786 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3788 if (!strcmp(filename, backing_file)) {
3789 error_setg(errp, "Error: Trying to create an image with the "
3790 "same filename as the backing file");
3795 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3797 // The size for the image must always be specified, with one exception:
3798 // If we are using a backing file, we can obtain the size from there
3799 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3802 BlockDriverState *bs;
3803 char *full_backing = g_new0(char, PATH_MAX);
3806 QDict *backing_options = NULL;
3808 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3809 full_backing, PATH_MAX,
3812 g_free(full_backing);
3816 /* backing files always opened read-only */
3818 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3821 backing_options = qdict_new();
3822 qdict_put(backing_options, "driver",
3823 qstring_from_str(backing_fmt));
3827 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3828 back_flags, &local_err);
3829 g_free(full_backing);
3833 size = bdrv_getlength(bs);
3835 error_setg_errno(errp, -size, "Could not get size of '%s'",
3841 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3845 error_setg(errp, "Image creation needs a size parameter");
3851 printf("Formatting '%s', fmt=%s ", filename, fmt);
3852 qemu_opts_print(opts, " ");
3856 ret = bdrv_create(drv, filename, opts, &local_err);
3858 if (ret == -EFBIG) {
3859 /* This is generally a better message than whatever the driver would
3860 * deliver (especially because of the cluster_size_hint), since that
3861 * is most probably not much different from "image too large". */
3862 const char *cluster_size_hint = "";
3863 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3864 cluster_size_hint = " (try using a larger cluster size)";
3866 error_setg(errp, "The image size is too large for file format '%s'"
3867 "%s", fmt, cluster_size_hint);
3868 error_free(local_err);
3873 qemu_opts_del(opts);
3874 qemu_opts_free(create_opts);
3876 error_propagate(errp, local_err);
3880 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3882 return bs->aio_context;
3885 void bdrv_detach_aio_context(BlockDriverState *bs)
3887 BdrvAioNotifier *baf;
3893 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3894 baf->detach_aio_context(baf->opaque);
3897 if (bs->io_limits_enabled) {
3898 throttle_timers_detach_aio_context(&bs->throttle_timers);
3900 if (bs->drv->bdrv_detach_aio_context) {
3901 bs->drv->bdrv_detach_aio_context(bs);
3904 bdrv_detach_aio_context(bs->file->bs);
3906 if (bs->backing_hd) {
3907 bdrv_detach_aio_context(bs->backing_hd);
3910 bs->aio_context = NULL;
3913 void bdrv_attach_aio_context(BlockDriverState *bs,
3914 AioContext *new_context)
3916 BdrvAioNotifier *ban;
3922 bs->aio_context = new_context;
3924 if (bs->backing_hd) {
3925 bdrv_attach_aio_context(bs->backing_hd, new_context);
3928 bdrv_attach_aio_context(bs->file->bs, new_context);
3930 if (bs->drv->bdrv_attach_aio_context) {
3931 bs->drv->bdrv_attach_aio_context(bs, new_context);
3933 if (bs->io_limits_enabled) {
3934 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3937 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3938 ban->attached_aio_context(new_context, ban->opaque);
3942 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3944 bdrv_drain(bs); /* ensure there are no in-flight requests */
3946 bdrv_detach_aio_context(bs);
3948 /* This function executes in the old AioContext so acquire the new one in
3949 * case it runs in a different thread.
3951 aio_context_acquire(new_context);
3952 bdrv_attach_aio_context(bs, new_context);
3953 aio_context_release(new_context);
3956 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3957 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3958 void (*detach_aio_context)(void *opaque), void *opaque)
3960 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3961 *ban = (BdrvAioNotifier){
3962 .attached_aio_context = attached_aio_context,
3963 .detach_aio_context = detach_aio_context,
3967 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3970 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3971 void (*attached_aio_context)(AioContext *,
3973 void (*detach_aio_context)(void *),
3976 BdrvAioNotifier *ban, *ban_next;
3978 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3979 if (ban->attached_aio_context == attached_aio_context &&
3980 ban->detach_aio_context == detach_aio_context &&
3981 ban->opaque == opaque)
3983 QLIST_REMOVE(ban, list);
3993 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3994 BlockDriverAmendStatusCB *status_cb)
3996 if (!bs->drv->bdrv_amend_options) {
3999 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
4002 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4003 * of block filter and by bdrv_is_first_non_filter.
4004 * It is used to test if the given bs is the candidate or recurse more in the
4007 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4008 BlockDriverState *candidate)
4010 /* return false if basic checks fails */
4011 if (!bs || !bs->drv) {
4015 /* the code reached a non block filter driver -> check if the bs is
4016 * the same as the candidate. It's the recursion termination condition.
4018 if (!bs->drv->is_filter) {
4019 return bs == candidate;
4021 /* Down this path the driver is a block filter driver */
4023 /* If the block filter recursion method is defined use it to recurse down
4026 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4027 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4030 /* the driver is a block filter but don't allow to recurse -> return false
4035 /* This function checks if the candidate is the first non filter bs down it's
4036 * bs chain. Since we don't have pointers to parents it explore all bs chains
4037 * from the top. Some filters can choose not to pass down the recursion.
4039 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4041 BlockDriverState *bs;
4043 /* walk down the bs forest recursively */
4044 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4047 /* try to recurse in this top level bs */
4048 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4050 /* candidate is the first non filter */
4059 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4060 const char *node_name, Error **errp)
4062 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4063 AioContext *aio_context;
4065 if (!to_replace_bs) {
4066 error_setg(errp, "Node name '%s' not found", node_name);
4070 aio_context = bdrv_get_aio_context(to_replace_bs);
4071 aio_context_acquire(aio_context);
4073 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4074 to_replace_bs = NULL;
4078 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4079 * most non filter in order to prevent data corruption.
4080 * Another benefit is that this tests exclude backing files which are
4081 * blocked by the backing blockers.
4083 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4084 error_setg(errp, "Only top most non filter can be replaced");
4085 to_replace_bs = NULL;
4090 aio_context_release(aio_context);
4091 return to_replace_bs;
4094 static bool append_open_options(QDict *d, BlockDriverState *bs)
4096 const QDictEntry *entry;
4097 bool found_any = false;
4099 for (entry = qdict_first(bs->options); entry;
4100 entry = qdict_next(bs->options, entry))
4102 /* Only take options for this level and exclude all non-driver-specific
4104 if (!strchr(qdict_entry_key(entry), '.') &&
4105 strcmp(qdict_entry_key(entry), "node-name"))
4107 qobject_incref(qdict_entry_value(entry));
4108 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4116 /* Updates the following BDS fields:
4117 * - exact_filename: A filename which may be used for opening a block device
4118 * which (mostly) equals the given BDS (even without any
4119 * other options; so reading and writing must return the same
4120 * results, but caching etc. may be different)
4121 * - full_open_options: Options which, when given when opening a block device
4122 * (without a filename), result in a BDS (mostly)
4123 * equalling the given one
4124 * - filename: If exact_filename is set, it is copied here. Otherwise,
4125 * full_open_options is converted to a JSON object, prefixed with
4126 * "json:" (for use through the JSON pseudo protocol) and put here.
4128 void bdrv_refresh_filename(BlockDriverState *bs)
4130 BlockDriver *drv = bs->drv;
4137 /* This BDS's file name will most probably depend on its file's name, so
4138 * refresh that first */
4140 bdrv_refresh_filename(bs->file->bs);
4143 if (drv->bdrv_refresh_filename) {
4144 /* Obsolete information is of no use here, so drop the old file name
4145 * information before refreshing it */
4146 bs->exact_filename[0] = '\0';
4147 if (bs->full_open_options) {
4148 QDECREF(bs->full_open_options);
4149 bs->full_open_options = NULL;
4152 drv->bdrv_refresh_filename(bs);
4153 } else if (bs->file) {
4154 /* Try to reconstruct valid information from the underlying file */
4155 bool has_open_options;
4157 bs->exact_filename[0] = '\0';
4158 if (bs->full_open_options) {
4159 QDECREF(bs->full_open_options);
4160 bs->full_open_options = NULL;
4164 has_open_options = append_open_options(opts, bs);
4166 /* If no specific options have been given for this BDS, the filename of
4167 * the underlying file should suffice for this one as well */
4168 if (bs->file->bs->exact_filename[0] && !has_open_options) {
4169 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4171 /* Reconstructing the full options QDict is simple for most format block
4172 * drivers, as long as the full options are known for the underlying
4173 * file BDS. The full options QDict of that file BDS should somehow
4174 * contain a representation of the filename, therefore the following
4175 * suffices without querying the (exact_)filename of this BDS. */
4176 if (bs->file->bs->full_open_options) {
4177 qdict_put_obj(opts, "driver",
4178 QOBJECT(qstring_from_str(drv->format_name)));
4179 QINCREF(bs->file->bs->full_open_options);
4180 qdict_put_obj(opts, "file",
4181 QOBJECT(bs->file->bs->full_open_options));
4183 bs->full_open_options = opts;
4187 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4188 /* There is no underlying file BDS (at least referenced by BDS.file),
4189 * so the full options QDict should be equal to the options given
4190 * specifically for this block device when it was opened (plus the
4191 * driver specification).
4192 * Because those options don't change, there is no need to update
4193 * full_open_options when it's already set. */
4196 append_open_options(opts, bs);
4197 qdict_put_obj(opts, "driver",
4198 QOBJECT(qstring_from_str(drv->format_name)));
4200 if (bs->exact_filename[0]) {
4201 /* This may not work for all block protocol drivers (some may
4202 * require this filename to be parsed), but we have to find some
4203 * default solution here, so just include it. If some block driver
4204 * does not support pure options without any filename at all or
4205 * needs some special format of the options QDict, it needs to
4206 * implement the driver-specific bdrv_refresh_filename() function.
4208 qdict_put_obj(opts, "filename",
4209 QOBJECT(qstring_from_str(bs->exact_filename)));
4212 bs->full_open_options = opts;
4215 if (bs->exact_filename[0]) {
4216 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4217 } else if (bs->full_open_options) {
4218 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4219 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4220 qstring_get_str(json));
4225 /* This accessor function purpose is to allow the device models to access the
4226 * BlockAcctStats structure embedded inside a BlockDriverState without being
4227 * aware of the BlockDriverState structure layout.
4228 * It will go away when the BlockAcctStats structure will be moved inside
4229 * the device models.
4231 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)