2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qjson.h"
33 #include "sysemu/block-backend.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/notify.h"
36 #include "block/coroutine.h"
37 #include "block/qapi.h"
38 #include "qmp-commands.h"
39 #include "qemu/timer.h"
40 #include "qapi-event.h"
41 #include "block/throttle-groups.h"
44 #include <sys/types.h>
46 #include <sys/ioctl.h>
47 #include <sys/queue.h>
58 * A BdrvDirtyBitmap can be in three possible states:
59 * (1) successor is NULL and disabled is false: full r/w mode
60 * (2) successor is NULL and disabled is true: read only mode ("disabled")
61 * (3) successor is set: frozen mode.
62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63 * or enabled. A frozen bitmap can only abdicate() or reclaim().
65 struct BdrvDirtyBitmap {
66 HBitmap *bitmap; /* Dirty sector bitmap implementation */
67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68 char *name; /* Optional non-empty unique ID */
69 int64_t size; /* Size of the bitmap (Number of sectors) */
70 bool disabled; /* Bitmap is read-only */
71 QLIST_ENTRY(BdrvDirtyBitmap) list;
74 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role,
89 BlockDriver *drv, Error **errp);
91 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
92 /* If non-zero, use only whitelisted block drivers */
93 static int use_bdrv_whitelist;
96 static int is_windows_drive_prefix(const char *filename)
98 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
99 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
103 int is_windows_drive(const char *filename)
105 if (is_windows_drive_prefix(filename) &&
108 if (strstart(filename, "\\\\.\\", NULL) ||
109 strstart(filename, "//./", NULL))
115 size_t bdrv_opt_mem_align(BlockDriverState *bs)
117 if (!bs || !bs->drv) {
118 /* page size or 4k (hdd sector size) should be on the safe side */
119 return MAX(4096, getpagesize());
122 return bs->bl.opt_mem_alignment;
125 size_t bdrv_min_mem_align(BlockDriverState *bs)
127 if (!bs || !bs->drv) {
128 /* page size or 4k (hdd sector size) should be on the safe side */
129 return MAX(4096, getpagesize());
132 return bs->bl.min_mem_alignment;
135 /* check if the path starts with "<protocol>:" */
136 int path_has_protocol(const char *path)
141 if (is_windows_drive(path) ||
142 is_windows_drive_prefix(path)) {
145 p = path + strcspn(path, ":/\\");
147 p = path + strcspn(path, ":/");
153 int path_is_absolute(const char *path)
156 /* specific case for names like: "\\.\d:" */
157 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
160 return (*path == '/' || *path == '\\');
162 return (*path == '/');
166 /* if filename is absolute, just copy it to dest. Otherwise, build a
167 path to it by considering it is relative to base_path. URL are
169 void path_combine(char *dest, int dest_size,
170 const char *base_path,
171 const char *filename)
178 if (path_is_absolute(filename)) {
179 pstrcpy(dest, dest_size, filename);
181 p = strchr(base_path, ':');
186 p1 = strrchr(base_path, '/');
190 p2 = strrchr(base_path, '\\');
202 if (len > dest_size - 1)
204 memcpy(dest, base_path, len);
206 pstrcat(dest, dest_size, filename);
210 void bdrv_get_full_backing_filename_from_filename(const char *backed,
212 char *dest, size_t sz,
215 if (backing[0] == '\0' || path_has_protocol(backing) ||
216 path_is_absolute(backing))
218 pstrcpy(dest, sz, backing);
219 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
220 error_setg(errp, "Cannot use relative backing file names for '%s'",
223 path_combine(dest, sz, backed, backing);
227 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
230 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
232 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
236 void bdrv_register(BlockDriver *bdrv)
238 bdrv_setup_io_funcs(bdrv);
240 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
243 BlockDriverState *bdrv_new_root(void)
245 BlockDriverState *bs = bdrv_new();
247 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
251 BlockDriverState *bdrv_new(void)
253 BlockDriverState *bs;
256 bs = g_new0(BlockDriverState, 1);
257 QLIST_INIT(&bs->dirty_bitmaps);
258 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
259 QLIST_INIT(&bs->op_blockers[i]);
261 bdrv_iostatus_disable(bs);
262 notifier_list_init(&bs->close_notifiers);
263 notifier_with_return_list_init(&bs->before_write_notifiers);
264 qemu_co_queue_init(&bs->throttled_reqs[0]);
265 qemu_co_queue_init(&bs->throttled_reqs[1]);
267 bs->aio_context = qemu_get_aio_context();
272 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
274 notifier_list_add(&bs->close_notifiers, notify);
277 BlockDriver *bdrv_find_format(const char *format_name)
280 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
281 if (!strcmp(drv1->format_name, format_name)) {
288 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
290 static const char *whitelist_rw[] = {
291 CONFIG_BDRV_RW_WHITELIST
293 static const char *whitelist_ro[] = {
294 CONFIG_BDRV_RO_WHITELIST
298 if (!whitelist_rw[0] && !whitelist_ro[0]) {
299 return 1; /* no whitelist, anything goes */
302 for (p = whitelist_rw; *p; p++) {
303 if (!strcmp(drv->format_name, *p)) {
308 for (p = whitelist_ro; *p; p++) {
309 if (!strcmp(drv->format_name, *p)) {
317 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
320 BlockDriver *drv = bdrv_find_format(format_name);
321 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
324 typedef struct CreateCo {
332 static void coroutine_fn bdrv_create_co_entry(void *opaque)
334 Error *local_err = NULL;
337 CreateCo *cco = opaque;
340 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
342 error_propagate(&cco->err, local_err);
347 int bdrv_create(BlockDriver *drv, const char* filename,
348 QemuOpts *opts, Error **errp)
355 .filename = g_strdup(filename),
361 if (!drv->bdrv_create) {
362 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
367 if (qemu_in_coroutine()) {
368 /* Fast-path if already in coroutine context */
369 bdrv_create_co_entry(&cco);
371 co = qemu_coroutine_create(bdrv_create_co_entry);
372 qemu_coroutine_enter(co, &cco);
373 while (cco.ret == NOT_DONE) {
374 aio_poll(qemu_get_aio_context(), true);
381 error_propagate(errp, cco.err);
383 error_setg_errno(errp, -ret, "Could not create image");
388 g_free(cco.filename);
392 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
395 Error *local_err = NULL;
398 drv = bdrv_find_protocol(filename, true, errp);
403 ret = bdrv_create(drv, filename, opts, &local_err);
405 error_propagate(errp, local_err);
411 * Try to get @bs's logical and physical block size.
412 * On success, store them in @bsz struct and return 0.
413 * On failure return -errno.
414 * @bs must not be empty.
416 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
418 BlockDriver *drv = bs->drv;
420 if (drv && drv->bdrv_probe_blocksizes) {
421 return drv->bdrv_probe_blocksizes(bs, bsz);
428 * Try to get @bs's geometry (cyls, heads, sectors).
429 * On success, store them in @geo struct and return 0.
430 * On failure return -errno.
431 * @bs must not be empty.
433 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
435 BlockDriver *drv = bs->drv;
437 if (drv && drv->bdrv_probe_geometry) {
438 return drv->bdrv_probe_geometry(bs, geo);
445 * Create a uniquely-named empty temporary file.
446 * Return 0 upon success, otherwise a negative errno value.
448 int get_tmp_filename(char *filename, int size)
451 char temp_dir[MAX_PATH];
452 /* GetTempFileName requires that its output buffer (4th param)
453 have length MAX_PATH or greater. */
454 assert(size >= MAX_PATH);
455 return (GetTempPath(MAX_PATH, temp_dir)
456 && GetTempFileName(temp_dir, "qem", 0, filename)
457 ? 0 : -GetLastError());
461 tmpdir = getenv("TMPDIR");
465 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
468 fd = mkstemp(filename);
472 if (close(fd) != 0) {
481 * Detect host devices. By convention, /dev/cdrom[N] is always
482 * recognized as a host CDROM.
484 static BlockDriver *find_hdev_driver(const char *filename)
486 int score_max = 0, score;
487 BlockDriver *drv = NULL, *d;
489 QLIST_FOREACH(d, &bdrv_drivers, list) {
490 if (d->bdrv_probe_device) {
491 score = d->bdrv_probe_device(filename);
492 if (score > score_max) {
502 BlockDriver *bdrv_find_protocol(const char *filename,
503 bool allow_protocol_prefix,
511 /* TODO Drivers without bdrv_file_open must be specified explicitly */
514 * XXX(hch): we really should not let host device detection
515 * override an explicit protocol specification, but moving this
516 * later breaks access to device names with colons in them.
517 * Thanks to the brain-dead persistent naming schemes on udev-
518 * based Linux systems those actually are quite common.
520 drv1 = find_hdev_driver(filename);
525 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
529 p = strchr(filename, ':');
532 if (len > sizeof(protocol) - 1)
533 len = sizeof(protocol) - 1;
534 memcpy(protocol, filename, len);
535 protocol[len] = '\0';
536 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
537 if (drv1->protocol_name &&
538 !strcmp(drv1->protocol_name, protocol)) {
543 error_setg(errp, "Unknown protocol '%s'", protocol);
548 * Guess image format by probing its contents.
549 * This is not a good idea when your image is raw (CVE-2008-2004), but
550 * we do it anyway for backward compatibility.
552 * @buf contains the image's first @buf_size bytes.
553 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
554 * but can be smaller if the image file is smaller)
555 * @filename is its filename.
557 * For all block drivers, call the bdrv_probe() method to get its
559 * Return the first block driver with the highest probing score.
561 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
562 const char *filename)
564 int score_max = 0, score;
565 BlockDriver *drv = NULL, *d;
567 QLIST_FOREACH(d, &bdrv_drivers, list) {
569 score = d->bdrv_probe(buf, buf_size, filename);
570 if (score > score_max) {
580 static int find_image_format(BlockDriverState *bs, const char *filename,
581 BlockDriver **pdrv, Error **errp)
584 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
587 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
588 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
593 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
595 error_setg_errno(errp, -ret, "Could not read image for determining its "
601 drv = bdrv_probe_all(buf, ret, filename);
603 error_setg(errp, "Could not determine image format: No compatible "
612 * Set the current 'total_sectors' value
613 * Return 0 on success, -errno on error.
615 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
617 BlockDriver *drv = bs->drv;
619 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
623 /* query actual device if possible, otherwise just trust the hint */
624 if (drv->bdrv_getlength) {
625 int64_t length = drv->bdrv_getlength(bs);
629 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
632 bs->total_sectors = hint;
637 * Set open flags for a given discard mode
639 * Return 0 on success, -1 if the discard mode was invalid.
641 int bdrv_parse_discard_flags(const char *mode, int *flags)
643 *flags &= ~BDRV_O_UNMAP;
645 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
647 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
648 *flags |= BDRV_O_UNMAP;
657 * Set open flags for a given cache mode
659 * Return 0 on success, -1 if the cache mode was invalid.
661 int bdrv_parse_cache_flags(const char *mode, int *flags)
663 *flags &= ~BDRV_O_CACHE_MASK;
665 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
666 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
667 } else if (!strcmp(mode, "directsync")) {
668 *flags |= BDRV_O_NOCACHE;
669 } else if (!strcmp(mode, "writeback")) {
670 *flags |= BDRV_O_CACHE_WB;
671 } else if (!strcmp(mode, "unsafe")) {
672 *flags |= BDRV_O_CACHE_WB;
673 *flags |= BDRV_O_NO_FLUSH;
674 } else if (!strcmp(mode, "writethrough")) {
675 /* this is the default */
684 * Returns the flags that a temporary snapshot should get, based on the
685 * originally requested flags (the originally requested image will have flags
686 * like a backing file)
688 static int bdrv_temp_snapshot_flags(int flags)
690 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
694 * Returns the flags that bs->file should get if a protocol driver is expected,
695 * based on the given flags for the parent BDS
697 static int bdrv_inherited_flags(int flags)
699 /* Enable protocol handling, disable format probing for bs->file */
700 flags |= BDRV_O_PROTOCOL;
702 /* Our block drivers take care to send flushes and respect unmap policy,
703 * so we can enable both unconditionally on lower layers. */
704 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
706 /* Clear flags that only apply to the top layer */
707 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
712 const BdrvChildRole child_file = {
713 .inherit_flags = bdrv_inherited_flags,
717 * Returns the flags that bs->file should get if the use of formats (and not
718 * only protocols) is permitted for it, based on the given flags for the parent
721 static int bdrv_inherited_fmt_flags(int parent_flags)
723 int flags = child_file.inherit_flags(parent_flags);
724 return flags & ~BDRV_O_PROTOCOL;
727 const BdrvChildRole child_format = {
728 .inherit_flags = bdrv_inherited_fmt_flags,
732 * Returns the flags that bs->backing_hd should get, based on the given flags
735 static int bdrv_backing_flags(int flags)
737 /* backing files always opened read-only */
738 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
740 /* snapshot=on is handled on the top layer */
741 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
746 static const BdrvChildRole child_backing = {
747 .inherit_flags = bdrv_backing_flags,
750 static int bdrv_open_flags(BlockDriverState *bs, int flags)
752 int open_flags = flags | BDRV_O_CACHE_WB;
755 * Clear flags that are internal to the block layer before opening the
758 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
761 * Snapshots should be writable.
763 if (flags & BDRV_O_TEMPORARY) {
764 open_flags |= BDRV_O_RDWR;
770 static void bdrv_assign_node_name(BlockDriverState *bs,
771 const char *node_name,
778 /* Check for empty string or invalid characters */
779 if (!id_wellformed(node_name)) {
780 error_setg(errp, "Invalid node name");
784 /* takes care of avoiding namespaces collisions */
785 if (blk_by_name(node_name)) {
786 error_setg(errp, "node-name=%s is conflicting with a device id",
791 /* takes care of avoiding duplicates node names */
792 if (bdrv_find_node(node_name)) {
793 error_setg(errp, "Duplicate node name");
797 /* copy node name into the bs and insert it into the graph list */
798 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
799 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
802 static QemuOptsList bdrv_runtime_opts = {
803 .name = "bdrv_common",
804 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
808 .type = QEMU_OPT_STRING,
809 .help = "Node name of the block device node",
811 { /* end of list */ }
816 * Common part for opening disk images and files
818 * Removes all processed options from *options.
820 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
821 QDict *options, int flags, BlockDriver *drv, Error **errp)
824 const char *filename;
825 const char *node_name = NULL;
827 Error *local_err = NULL;
830 assert(bs->file == NULL);
831 assert(options != NULL && bs->options != options);
834 filename = file->filename;
836 filename = qdict_get_try_str(options, "filename");
839 if (drv->bdrv_needs_filename && !filename) {
840 error_setg(errp, "The '%s' block driver requires a file name",
845 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
847 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
848 qemu_opts_absorb_qdict(opts, options, &local_err);
850 error_propagate(errp, local_err);
855 node_name = qemu_opt_get(opts, "node-name");
856 bdrv_assign_node_name(bs, node_name, &local_err);
858 error_propagate(errp, local_err);
863 bs->guest_block_size = 512;
864 bs->request_alignment = 512;
865 bs->zero_beyond_eof = true;
866 open_flags = bdrv_open_flags(bs, flags);
867 bs->read_only = !(open_flags & BDRV_O_RDWR);
869 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
871 !bs->read_only && bdrv_is_whitelisted(drv, true)
872 ? "Driver '%s' can only be used for read-only devices"
873 : "Driver '%s' is not whitelisted",
879 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
880 if (flags & BDRV_O_COPY_ON_READ) {
881 if (!bs->read_only) {
882 bdrv_enable_copy_on_read(bs);
884 error_setg(errp, "Can't use copy-on-read on read-only device");
890 if (filename != NULL) {
891 pstrcpy(bs->filename, sizeof(bs->filename), filename);
893 bs->filename[0] = '\0';
895 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
898 bs->opaque = g_malloc0(drv->instance_size);
900 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
902 /* Open the image, either directly or using a protocol */
903 if (drv->bdrv_file_open) {
904 assert(file == NULL);
905 assert(!drv->bdrv_needs_filename || filename != NULL);
906 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
909 error_setg(errp, "Can't use '%s' as a block driver for the "
910 "protocol level", drv->format_name);
915 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
920 error_propagate(errp, local_err);
921 } else if (bs->filename[0]) {
922 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
924 error_setg_errno(errp, -ret, "Could not open image");
930 error_report("Encrypted images are deprecated");
931 error_printf("Support for them will be removed in a future release.\n"
932 "You can use 'qemu-img convert' to convert your image"
933 " to an unencrypted one.\n");
936 ret = refresh_total_sectors(bs, bs->total_sectors);
938 error_setg_errno(errp, -ret, "Could not refresh total sector count");
942 bdrv_refresh_limits(bs, &local_err);
944 error_propagate(errp, local_err);
949 assert(bdrv_opt_mem_align(bs) != 0);
950 assert(bdrv_min_mem_align(bs) != 0);
951 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
966 static QDict *parse_json_filename(const char *filename, Error **errp)
968 QObject *options_obj;
972 ret = strstart(filename, "json:", &filename);
975 options_obj = qobject_from_json(filename);
977 error_setg(errp, "Could not parse the JSON options");
981 if (qobject_type(options_obj) != QTYPE_QDICT) {
982 qobject_decref(options_obj);
983 error_setg(errp, "Invalid JSON object given");
987 options = qobject_to_qdict(options_obj);
988 qdict_flatten(options);
994 * Fills in default options for opening images and converts the legacy
995 * filename/flags pair to option QDict entries.
996 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
997 * block driver has been specified explicitly.
999 static int bdrv_fill_options(QDict **options, const char **pfilename,
1000 int *flags, BlockDriver *drv, Error **errp)
1002 const char *filename = *pfilename;
1003 const char *drvname;
1004 bool protocol = *flags & BDRV_O_PROTOCOL;
1005 bool parse_filename = false;
1006 BlockDriver *tmp_drv;
1007 Error *local_err = NULL;
1009 /* Parse json: pseudo-protocol */
1010 if (filename && g_str_has_prefix(filename, "json:")) {
1011 QDict *json_options = parse_json_filename(filename, &local_err);
1013 error_propagate(errp, local_err);
1017 /* Options given in the filename have lower priority than options
1018 * specified directly */
1019 qdict_join(*options, json_options, false);
1020 QDECREF(json_options);
1021 *pfilename = filename = NULL;
1024 drvname = qdict_get_try_str(*options, "driver");
1026 /* If the user has explicitly specified the driver, this choice should
1027 * override the BDRV_O_PROTOCOL flag */
1029 if (!tmp_drv && drvname) {
1030 tmp_drv = bdrv_find_format(drvname);
1033 protocol = tmp_drv->bdrv_file_open;
1037 *flags |= BDRV_O_PROTOCOL;
1039 *flags &= ~BDRV_O_PROTOCOL;
1042 /* Fetch the file name from the options QDict if necessary */
1043 if (protocol && filename) {
1044 if (!qdict_haskey(*options, "filename")) {
1045 qdict_put(*options, "filename", qstring_from_str(filename));
1046 parse_filename = true;
1048 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1054 /* Find the right block driver */
1055 filename = qdict_get_try_str(*options, "filename");
1059 error_setg(errp, "Driver specified twice");
1062 drvname = drv->format_name;
1063 qdict_put(*options, "driver", qstring_from_str(drvname));
1065 if (!drvname && protocol) {
1067 drv = bdrv_find_protocol(filename, parse_filename, errp);
1072 drvname = drv->format_name;
1073 qdict_put(*options, "driver", qstring_from_str(drvname));
1075 error_setg(errp, "Must specify either driver or file");
1078 } else if (drvname) {
1079 drv = bdrv_find_format(drvname);
1081 error_setg(errp, "Unknown driver '%s'", drvname);
1087 assert(drv || !protocol);
1089 /* Driver-specific filename parsing */
1090 if (drv && drv->bdrv_parse_filename && parse_filename) {
1091 drv->bdrv_parse_filename(filename, *options, &local_err);
1093 error_propagate(errp, local_err);
1097 if (!drv->bdrv_needs_filename) {
1098 qdict_del(*options, "filename");
1105 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1106 BlockDriverState *child_bs,
1107 const BdrvChildRole *child_role)
1109 BdrvChild *child = g_new(BdrvChild, 1);
1110 *child = (BdrvChild) {
1115 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1120 static void bdrv_detach_child(BdrvChild *child)
1122 QLIST_REMOVE(child, next);
1126 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1128 BlockDriverState *child_bs = child->bs;
1130 if (child->bs->inherits_from == parent) {
1131 child->bs->inherits_from = NULL;
1134 bdrv_detach_child(child);
1135 bdrv_unref(child_bs);
1138 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1141 if (bs->backing_hd) {
1142 assert(bs->backing_blocker);
1143 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1144 bdrv_detach_child(bs->backing_child);
1145 } else if (backing_hd) {
1146 error_setg(&bs->backing_blocker,
1147 "node is used as backing hd of '%s'",
1148 bdrv_get_device_or_node_name(bs));
1151 bs->backing_hd = backing_hd;
1153 error_free(bs->backing_blocker);
1154 bs->backing_blocker = NULL;
1155 bs->backing_child = NULL;
1158 bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
1159 bs->open_flags &= ~BDRV_O_NO_BACKING;
1160 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1161 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1162 backing_hd->drv ? backing_hd->drv->format_name : "");
1164 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1165 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1166 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1167 bs->backing_blocker);
1169 bdrv_refresh_limits(bs, NULL);
1173 * Opens the backing file for a BlockDriverState if not yet open
1175 * options is a QDict of options to pass to the block drivers, or NULL for an
1176 * empty set of options. The reference to the QDict is transferred to this
1177 * function (even on failure), so if the caller intends to reuse the dictionary,
1178 * it needs to use QINCREF() before calling bdrv_file_open.
1180 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1182 char *backing_filename = g_malloc0(PATH_MAX);
1184 BlockDriverState *backing_hd;
1185 Error *local_err = NULL;
1187 if (bs->backing_hd != NULL) {
1192 /* NULL means an empty set of options */
1193 if (options == NULL) {
1194 options = qdict_new();
1197 bs->open_flags &= ~BDRV_O_NO_BACKING;
1198 if (qdict_haskey(options, "file.filename")) {
1199 backing_filename[0] = '\0';
1200 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1204 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1208 error_propagate(errp, local_err);
1214 if (!bs->drv || !bs->drv->supports_backing) {
1216 error_setg(errp, "Driver doesn't support backing files");
1221 backing_hd = bdrv_new();
1223 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1224 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1227 assert(bs->backing_hd == NULL);
1228 ret = bdrv_open_inherit(&backing_hd,
1229 *backing_filename ? backing_filename : NULL,
1230 NULL, options, 0, bs, &child_backing,
1233 bdrv_unref(backing_hd);
1235 bs->open_flags |= BDRV_O_NO_BACKING;
1236 error_setg(errp, "Could not open backing file: %s",
1237 error_get_pretty(local_err));
1238 error_free(local_err);
1242 bdrv_set_backing_hd(bs, backing_hd);
1245 g_free(backing_filename);
1250 * Opens a disk image whose options are given as BlockdevRef in another block
1253 * If allow_none is true, no image will be opened if filename is false and no
1254 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1256 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1257 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1258 * itself, all options starting with "${bdref_key}." are considered part of the
1261 * The BlockdevRef will be removed from the options QDict.
1263 BdrvChild *bdrv_open_child(const char *filename,
1264 QDict *options, const char *bdref_key,
1265 BlockDriverState* parent,
1266 const BdrvChildRole *child_role,
1267 bool allow_none, Error **errp)
1269 BdrvChild *c = NULL;
1270 BlockDriverState *bs;
1271 QDict *image_options;
1273 char *bdref_key_dot;
1274 const char *reference;
1276 assert(child_role != NULL);
1278 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1279 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1280 g_free(bdref_key_dot);
1282 reference = qdict_get_try_str(options, bdref_key);
1283 if (!filename && !reference && !qdict_size(image_options)) {
1285 error_setg(errp, "A block device must be specified for \"%s\"",
1288 QDECREF(image_options);
1293 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1294 parent, child_role, NULL, errp);
1299 c = bdrv_attach_child(parent, bs, child_role);
1302 qdict_del(options, bdref_key);
1307 * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
1308 * a BdrvChild object.
1310 * If allow_none is true, no image will be opened if filename is false and no
1311 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1313 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1315 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1316 QDict *options, const char *bdref_key,
1317 BlockDriverState* parent, const BdrvChildRole *child_role,
1318 bool allow_none, Error **errp)
1320 Error *local_err = NULL;
1324 assert(*pbs == NULL);
1326 c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
1327 allow_none, &local_err);
1329 error_propagate(errp, local_err);
1340 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1342 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1343 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1345 QemuOpts *opts = NULL;
1346 QDict *snapshot_options;
1347 BlockDriverState *bs_snapshot;
1348 Error *local_err = NULL;
1351 /* if snapshot, we create a temporary backing file and open it
1352 instead of opening 'filename' directly */
1354 /* Get the required size from the image */
1355 total_size = bdrv_getlength(bs);
1356 if (total_size < 0) {
1358 error_setg_errno(errp, -total_size, "Could not get image size");
1362 /* Create the temporary image */
1363 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1365 error_setg_errno(errp, -ret, "Could not get temporary filename");
1369 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1371 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1372 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1373 qemu_opts_del(opts);
1375 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1376 "'%s': %s", tmp_filename,
1377 error_get_pretty(local_err));
1378 error_free(local_err);
1382 /* Prepare a new options QDict for the temporary file */
1383 snapshot_options = qdict_new();
1384 qdict_put(snapshot_options, "file.driver",
1385 qstring_from_str("file"));
1386 qdict_put(snapshot_options, "file.filename",
1387 qstring_from_str(tmp_filename));
1389 bs_snapshot = bdrv_new();
1391 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1392 flags, &bdrv_qcow2, &local_err);
1394 error_propagate(errp, local_err);
1398 bdrv_append(bs_snapshot, bs);
1401 g_free(tmp_filename);
1406 * Opens a disk image (raw, qcow2, vmdk, ...)
1408 * options is a QDict of options to pass to the block drivers, or NULL for an
1409 * empty set of options. The reference to the QDict belongs to the block layer
1410 * after the call (even on failure), so if the caller intends to reuse the
1411 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1413 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1414 * If it is not NULL, the referenced BDS will be reused.
1416 * The reference parameter may be used to specify an existing block device which
1417 * should be opened. If specified, neither options nor a filename may be given,
1418 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1420 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1421 const char *reference, QDict *options, int flags,
1422 BlockDriverState *parent,
1423 const BdrvChildRole *child_role,
1424 BlockDriver *drv, Error **errp)
1427 BlockDriverState *file = NULL, *bs;
1428 const char *drvname;
1429 Error *local_err = NULL;
1430 int snapshot_flags = 0;
1433 assert(!child_role || !flags);
1434 assert(!child_role == !parent);
1437 bool options_non_empty = options ? qdict_size(options) : false;
1441 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1442 "another block device");
1446 if (filename || options_non_empty) {
1447 error_setg(errp, "Cannot reference an existing block device with "
1448 "additional options or a new filename");
1452 bs = bdrv_lookup_bs(reference, reference, errp);
1467 /* NULL means an empty set of options */
1468 if (options == NULL) {
1469 options = qdict_new();
1473 bs->inherits_from = parent;
1474 flags = child_role->inherit_flags(parent->open_flags);
1477 ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err);
1482 /* Find the right image format driver */
1484 drvname = qdict_get_try_str(options, "driver");
1486 drv = bdrv_find_format(drvname);
1487 qdict_del(options, "driver");
1489 error_setg(errp, "Unknown driver: '%s'", drvname);
1495 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1497 bs->open_flags = flags;
1498 bs->options = options;
1499 options = qdict_clone_shallow(options);
1501 /* Open image file without format layer */
1502 if ((flags & BDRV_O_PROTOCOL) == 0) {
1503 if (flags & BDRV_O_RDWR) {
1504 flags |= BDRV_O_ALLOW_RDWR;
1506 if (flags & BDRV_O_SNAPSHOT) {
1507 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1508 flags = bdrv_backing_flags(flags);
1511 assert(file == NULL);
1512 bs->open_flags = flags;
1513 ret = bdrv_open_image(&file, filename, options, "file",
1514 bs, &child_file, true, &local_err);
1520 /* Image format probing */
1523 ret = find_image_format(file, filename, &drv, &local_err);
1528 error_setg(errp, "Must specify either driver or file");
1533 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1534 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1535 /* file must be NULL if a protocol BDS is about to be created
1536 * (the inverse results in an error message from bdrv_open_common()) */
1537 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1539 /* Open the image */
1540 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1545 if (file && (bs->file != file)) {
1550 /* If there is a backing file, use it */
1551 if ((flags & BDRV_O_NO_BACKING) == 0) {
1552 QDict *backing_options;
1554 qdict_extract_subqdict(options, &backing_options, "backing.");
1555 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1557 goto close_and_fail;
1561 bdrv_refresh_filename(bs);
1563 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1564 * temporary snapshot afterwards. */
1565 if (snapshot_flags) {
1566 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1568 goto close_and_fail;
1572 /* Check if any unknown options were used */
1573 if (options && (qdict_size(options) != 0)) {
1574 const QDictEntry *entry = qdict_first(options);
1575 if (flags & BDRV_O_PROTOCOL) {
1576 error_setg(errp, "Block protocol '%s' doesn't support the option "
1577 "'%s'", drv->format_name, entry->key);
1579 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1580 "support the option '%s'", drv->format_name,
1581 bdrv_get_device_name(bs), entry->key);
1585 goto close_and_fail;
1588 if (!bdrv_key_required(bs)) {
1590 blk_dev_change_media_cb(bs->blk, true);
1592 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1593 && !runstate_check(RUN_STATE_INMIGRATE)
1594 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1596 "Guest must be stopped for opening of encrypted image");
1598 goto close_and_fail;
1609 QDECREF(bs->options);
1613 /* If *pbs is NULL, a new BDS has been created in this function and
1614 needs to be freed now. Otherwise, it does not need to be closed,
1615 since it has not really been opened yet. */
1619 error_propagate(errp, local_err);
1624 /* See fail path, but now the BDS has to be always closed */
1632 error_propagate(errp, local_err);
1637 int bdrv_open(BlockDriverState **pbs, const char *filename,
1638 const char *reference, QDict *options, int flags,
1639 BlockDriver *drv, Error **errp)
1641 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1645 typedef struct BlockReopenQueueEntry {
1647 BDRVReopenState state;
1648 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1649 } BlockReopenQueueEntry;
1652 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1653 * reopen of multiple devices.
1655 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1656 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1657 * be created and initialized. This newly created BlockReopenQueue should be
1658 * passed back in for subsequent calls that are intended to be of the same
1661 * bs is the BlockDriverState to add to the reopen queue.
1663 * flags contains the open flags for the associated bs
1665 * returns a pointer to bs_queue, which is either the newly allocated
1666 * bs_queue, or the existing bs_queue being used.
1669 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1670 BlockDriverState *bs, int flags)
1674 BlockReopenQueueEntry *bs_entry;
1677 if (bs_queue == NULL) {
1678 bs_queue = g_new0(BlockReopenQueue, 1);
1679 QSIMPLEQ_INIT(bs_queue);
1682 /* bdrv_open() masks this flag out */
1683 flags &= ~BDRV_O_PROTOCOL;
1685 QLIST_FOREACH(child, &bs->children, next) {
1688 if (child->bs->inherits_from != bs) {
1692 child_flags = child->role->inherit_flags(flags);
1693 bdrv_reopen_queue(bs_queue, child->bs, child_flags);
1696 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1697 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1699 bs_entry->state.bs = bs;
1700 bs_entry->state.flags = flags;
1706 * Reopen multiple BlockDriverStates atomically & transactionally.
1708 * The queue passed in (bs_queue) must have been built up previous
1709 * via bdrv_reopen_queue().
1711 * Reopens all BDS specified in the queue, with the appropriate
1712 * flags. All devices are prepared for reopen, and failure of any
1713 * device will cause all device changes to be abandonded, and intermediate
1716 * If all devices prepare successfully, then the changes are committed
1720 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1723 BlockReopenQueueEntry *bs_entry, *next;
1724 Error *local_err = NULL;
1726 assert(bs_queue != NULL);
1730 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1731 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1732 error_propagate(errp, local_err);
1735 bs_entry->prepared = true;
1738 /* If we reach this point, we have success and just need to apply the
1741 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1742 bdrv_reopen_commit(&bs_entry->state);
1748 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1749 if (ret && bs_entry->prepared) {
1750 bdrv_reopen_abort(&bs_entry->state);
1759 /* Reopen a single BlockDriverState with the specified flags. */
1760 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1763 Error *local_err = NULL;
1764 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1766 ret = bdrv_reopen_multiple(queue, &local_err);
1767 if (local_err != NULL) {
1768 error_propagate(errp, local_err);
1775 * Prepares a BlockDriverState for reopen. All changes are staged in the
1776 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1777 * the block driver layer .bdrv_reopen_prepare()
1779 * bs is the BlockDriverState to reopen
1780 * flags are the new open flags
1781 * queue is the reopen queue
1783 * Returns 0 on success, non-zero on error. On error errp will be set
1786 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1787 * It is the responsibility of the caller to then call the abort() or
1788 * commit() for any other BDS that have been left in a prepare() state
1791 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1795 Error *local_err = NULL;
1798 assert(reopen_state != NULL);
1799 assert(reopen_state->bs->drv != NULL);
1800 drv = reopen_state->bs->drv;
1802 /* if we are to stay read-only, do not allow permission change
1804 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1805 reopen_state->flags & BDRV_O_RDWR) {
1806 error_setg(errp, "Node '%s' is read only",
1807 bdrv_get_device_or_node_name(reopen_state->bs));
1812 ret = bdrv_flush(reopen_state->bs);
1814 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1819 if (drv->bdrv_reopen_prepare) {
1820 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1822 if (local_err != NULL) {
1823 error_propagate(errp, local_err);
1825 error_setg(errp, "failed while preparing to reopen image '%s'",
1826 reopen_state->bs->filename);
1831 /* It is currently mandatory to have a bdrv_reopen_prepare()
1832 * handler for each supported drv. */
1833 error_setg(errp, "Block format '%s' used by node '%s' "
1834 "does not support reopening files", drv->format_name,
1835 bdrv_get_device_or_node_name(reopen_state->bs));
1847 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1848 * makes them final by swapping the staging BlockDriverState contents into
1849 * the active BlockDriverState contents.
1851 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1855 assert(reopen_state != NULL);
1856 drv = reopen_state->bs->drv;
1857 assert(drv != NULL);
1859 /* If there are any driver level actions to take */
1860 if (drv->bdrv_reopen_commit) {
1861 drv->bdrv_reopen_commit(reopen_state);
1864 /* set BDS specific flags now */
1865 reopen_state->bs->open_flags = reopen_state->flags;
1866 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1868 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1870 bdrv_refresh_limits(reopen_state->bs, NULL);
1874 * Abort the reopen, and delete and free the staged changes in
1877 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1881 assert(reopen_state != NULL);
1882 drv = reopen_state->bs->drv;
1883 assert(drv != NULL);
1885 if (drv->bdrv_reopen_abort) {
1886 drv->bdrv_reopen_abort(reopen_state);
1891 void bdrv_close(BlockDriverState *bs)
1893 BdrvAioNotifier *ban, *ban_next;
1896 block_job_cancel_sync(bs->job);
1898 bdrv_drain(bs); /* complete I/O */
1900 bdrv_drain(bs); /* in case flush left pending I/O */
1901 notifier_list_notify(&bs->close_notifiers, bs);
1904 BdrvChild *child, *next;
1906 bs->drv->bdrv_close(bs);
1908 if (bs->backing_hd) {
1909 BlockDriverState *backing_hd = bs->backing_hd;
1910 bdrv_set_backing_hd(bs, NULL);
1911 bdrv_unref(backing_hd);
1914 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1915 /* TODO Remove bdrv_unref() from drivers' close function and use
1916 * bdrv_unref_child() here */
1917 if (child->bs->inherits_from == bs) {
1918 child->bs->inherits_from = NULL;
1920 bdrv_detach_child(child);
1926 bs->copy_on_read = 0;
1927 bs->backing_file[0] = '\0';
1928 bs->backing_format[0] = '\0';
1929 bs->total_sectors = 0;
1933 bs->zero_beyond_eof = false;
1934 QDECREF(bs->options);
1936 QDECREF(bs->full_open_options);
1937 bs->full_open_options = NULL;
1939 if (bs->file != NULL) {
1940 bdrv_unref(bs->file);
1946 blk_dev_change_media_cb(bs->blk, false);
1949 /*throttling disk I/O limits*/
1950 if (bs->io_limits_enabled) {
1951 bdrv_io_limits_disable(bs);
1954 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1957 QLIST_INIT(&bs->aio_notifiers);
1960 void bdrv_close_all(void)
1962 BlockDriverState *bs;
1964 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1965 AioContext *aio_context = bdrv_get_aio_context(bs);
1967 aio_context_acquire(aio_context);
1969 aio_context_release(aio_context);
1973 /* make a BlockDriverState anonymous by removing from bdrv_state and
1974 * graph_bdrv_state list.
1975 Also, NULL terminate the device_name to prevent double remove */
1976 void bdrv_make_anon(BlockDriverState *bs)
1979 * Take care to remove bs from bdrv_states only when it's actually
1980 * in it. Note that bs->device_list.tqe_prev is initially null,
1981 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1982 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1983 * resetting it to null on remove.
1985 if (bs->device_list.tqe_prev) {
1986 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1987 bs->device_list.tqe_prev = NULL;
1989 if (bs->node_name[0] != '\0') {
1990 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1992 bs->node_name[0] = '\0';
1995 static void bdrv_rebind(BlockDriverState *bs)
1997 if (bs->drv && bs->drv->bdrv_rebind) {
1998 bs->drv->bdrv_rebind(bs);
2002 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2003 BlockDriverState *bs_src)
2005 /* move some fields that need to stay attached to the device */
2008 bs_dest->guest_block_size = bs_src->guest_block_size;
2009 bs_dest->copy_on_read = bs_src->copy_on_read;
2011 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2013 /* i/o throttled req */
2014 bs_dest->throttle_state = bs_src->throttle_state,
2015 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2016 bs_dest->pending_reqs[0] = bs_src->pending_reqs[0];
2017 bs_dest->pending_reqs[1] = bs_src->pending_reqs[1];
2018 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2019 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
2020 memcpy(&bs_dest->round_robin,
2021 &bs_src->round_robin,
2022 sizeof(bs_dest->round_robin));
2023 memcpy(&bs_dest->throttle_timers,
2024 &bs_src->throttle_timers,
2025 sizeof(ThrottleTimers));
2028 bs_dest->on_read_error = bs_src->on_read_error;
2029 bs_dest->on_write_error = bs_src->on_write_error;
2032 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2033 bs_dest->iostatus = bs_src->iostatus;
2036 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2038 /* reference count */
2039 bs_dest->refcnt = bs_src->refcnt;
2042 bs_dest->job = bs_src->job;
2044 /* keep the same entry in bdrv_states */
2045 bs_dest->device_list = bs_src->device_list;
2046 bs_dest->blk = bs_src->blk;
2048 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2049 sizeof(bs_dest->op_blockers));
2053 * Swap bs contents for two image chains while they are live,
2054 * while keeping required fields on the BlockDriverState that is
2055 * actually attached to a device.
2057 * This will modify the BlockDriverState fields, and swap contents
2058 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2060 * bs_new must not be attached to a BlockBackend.
2062 * This function does not create any image files.
2064 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2066 BlockDriverState tmp;
2072 /* The code needs to swap the node_name but simply swapping node_list won't
2073 * work so first remove the nodes from the graph list, do the swap then
2074 * insert them back if needed.
2076 if (bs_new->node_name[0] != '\0') {
2077 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2079 if (bs_old->node_name[0] != '\0') {
2080 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2083 /* If the BlockDriverState is part of a throttling group acquire
2084 * its lock since we're going to mess with the protected fields.
2085 * Otherwise there's no need to worry since no one else can touch
2087 if (bs_old->throttle_state) {
2088 throttle_group_lock(bs_old);
2091 /* bs_new must be unattached and shouldn't have anything fancy enabled */
2092 assert(!bs_new->blk);
2093 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
2094 assert(bs_new->job == NULL);
2095 assert(bs_new->io_limits_enabled == false);
2096 assert(bs_new->throttle_state == NULL);
2097 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2103 /* there are some fields that should not be swapped, move them back */
2104 bdrv_move_feature_fields(&tmp, bs_old);
2105 bdrv_move_feature_fields(bs_old, bs_new);
2106 bdrv_move_feature_fields(bs_new, &tmp);
2108 /* bs_new must remain unattached */
2109 assert(!bs_new->blk);
2111 /* Check a few fields that should remain attached to the device */
2112 assert(bs_new->job == NULL);
2113 assert(bs_new->io_limits_enabled == false);
2114 assert(bs_new->throttle_state == NULL);
2115 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2117 /* Release the ThrottleGroup lock */
2118 if (bs_old->throttle_state) {
2119 throttle_group_unlock(bs_old);
2122 /* insert the nodes back into the graph node list if needed */
2123 if (bs_new->node_name[0] != '\0') {
2124 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2126 if (bs_old->node_name[0] != '\0') {
2127 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2131 * Update lh_first.le_prev for non-empty lists.
2133 * The head of the op blocker list doesn't change because it is moved back
2134 * in bdrv_move_feature_fields().
2136 assert(QLIST_EMPTY(&bs_old->tracked_requests));
2137 assert(QLIST_EMPTY(&bs_new->tracked_requests));
2139 QLIST_FIX_HEAD_PTR(&bs_new->children, next);
2140 QLIST_FIX_HEAD_PTR(&bs_old->children, next);
2142 /* Update references in bs->opaque and children */
2143 QLIST_FOREACH(child, &bs_old->children, next) {
2144 if (child->bs->inherits_from == bs_new) {
2145 child->bs->inherits_from = bs_old;
2148 QLIST_FOREACH(child, &bs_new->children, next) {
2149 if (child->bs->inherits_from == bs_old) {
2150 child->bs->inherits_from = bs_new;
2154 bdrv_rebind(bs_new);
2155 bdrv_rebind(bs_old);
2159 * Add new bs contents at the top of an image chain while the chain is
2160 * live, while keeping required fields on the top layer.
2162 * This will modify the BlockDriverState fields, and swap contents
2163 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2165 * bs_new must not be attached to a BlockBackend.
2167 * This function does not create any image files.
2169 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2171 bdrv_swap(bs_new, bs_top);
2173 /* The contents of 'tmp' will become bs_top, as we are
2174 * swapping bs_new and bs_top contents. */
2175 bdrv_set_backing_hd(bs_top, bs_new);
2178 static void bdrv_delete(BlockDriverState *bs)
2181 assert(bdrv_op_blocker_is_empty(bs));
2182 assert(!bs->refcnt);
2183 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2189 /* remove from list, if necessary */
2196 * Run consistency checks on an image
2198 * Returns 0 if the check could be completed (it doesn't mean that the image is
2199 * free of errors) or -errno when an internal error occurred. The results of the
2200 * check are stored in res.
2202 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2204 if (bs->drv == NULL) {
2207 if (bs->drv->bdrv_check == NULL) {
2211 memset(res, 0, sizeof(*res));
2212 return bs->drv->bdrv_check(bs, res, fix);
2215 #define COMMIT_BUF_SECTORS 2048
2217 /* commit COW file into the raw image */
2218 int bdrv_commit(BlockDriverState *bs)
2220 BlockDriver *drv = bs->drv;
2221 int64_t sector, total_sectors, length, backing_length;
2222 int n, ro, open_flags;
2224 uint8_t *buf = NULL;
2229 if (!bs->backing_hd) {
2233 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2234 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2238 ro = bs->backing_hd->read_only;
2239 open_flags = bs->backing_hd->open_flags;
2242 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2247 length = bdrv_getlength(bs);
2253 backing_length = bdrv_getlength(bs->backing_hd);
2254 if (backing_length < 0) {
2255 ret = backing_length;
2259 /* If our top snapshot is larger than the backing file image,
2260 * grow the backing file image if possible. If not possible,
2261 * we must return an error */
2262 if (length > backing_length) {
2263 ret = bdrv_truncate(bs->backing_hd, length);
2269 total_sectors = length >> BDRV_SECTOR_BITS;
2271 /* qemu_try_blockalign() for bs will choose an alignment that works for
2272 * bs->backing_hd as well, so no need to compare the alignment manually. */
2273 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2279 for (sector = 0; sector < total_sectors; sector += n) {
2280 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2285 ret = bdrv_read(bs, sector, buf, n);
2290 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2297 if (drv->bdrv_make_empty) {
2298 ret = drv->bdrv_make_empty(bs);
2306 * Make sure all data we wrote to the backing device is actually
2309 if (bs->backing_hd) {
2310 bdrv_flush(bs->backing_hd);
2318 /* ignoring error return here */
2319 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2325 int bdrv_commit_all(void)
2327 BlockDriverState *bs;
2329 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2330 AioContext *aio_context = bdrv_get_aio_context(bs);
2332 aio_context_acquire(aio_context);
2333 if (bs->drv && bs->backing_hd) {
2334 int ret = bdrv_commit(bs);
2336 aio_context_release(aio_context);
2340 aio_context_release(aio_context);
2348 * -EINVAL - backing format specified, but no file
2349 * -ENOSPC - can't update the backing file because no space is left in the
2351 * -ENOTSUP - format driver doesn't support changing the backing file
2353 int bdrv_change_backing_file(BlockDriverState *bs,
2354 const char *backing_file, const char *backing_fmt)
2356 BlockDriver *drv = bs->drv;
2359 /* Backing file format doesn't make sense without a backing file */
2360 if (backing_fmt && !backing_file) {
2364 if (drv->bdrv_change_backing_file != NULL) {
2365 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2371 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2372 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2378 * Finds the image layer in the chain that has 'bs' as its backing file.
2380 * active is the current topmost image.
2382 * Returns NULL if bs is not found in active's image chain,
2383 * or if active == bs.
2385 * Returns the bottommost base image if bs == NULL.
2387 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2388 BlockDriverState *bs)
2390 while (active && bs != active->backing_hd) {
2391 active = active->backing_hd;
2397 /* Given a BDS, searches for the base layer. */
2398 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2400 return bdrv_find_overlay(bs, NULL);
2403 typedef struct BlkIntermediateStates {
2404 BlockDriverState *bs;
2405 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2406 } BlkIntermediateStates;
2410 * Drops images above 'base' up to and including 'top', and sets the image
2411 * above 'top' to have base as its backing file.
2413 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2414 * information in 'bs' can be properly updated.
2416 * E.g., this will convert the following chain:
2417 * bottom <- base <- intermediate <- top <- active
2421 * bottom <- base <- active
2423 * It is allowed for bottom==base, in which case it converts:
2425 * base <- intermediate <- top <- active
2431 * If backing_file_str is non-NULL, it will be used when modifying top's
2432 * overlay image metadata.
2435 * if active == top, that is considered an error
2438 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2439 BlockDriverState *base, const char *backing_file_str)
2441 BlockDriverState *intermediate;
2442 BlockDriverState *base_bs = NULL;
2443 BlockDriverState *new_top_bs = NULL;
2444 BlkIntermediateStates *intermediate_state, *next;
2447 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2448 QSIMPLEQ_INIT(&states_to_delete);
2450 if (!top->drv || !base->drv) {
2454 new_top_bs = bdrv_find_overlay(active, top);
2456 if (new_top_bs == NULL) {
2457 /* we could not find the image above 'top', this is an error */
2461 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2462 * to do, no intermediate images */
2463 if (new_top_bs->backing_hd == base) {
2470 /* now we will go down through the list, and add each BDS we find
2471 * into our deletion queue, until we hit the 'base'
2473 while (intermediate) {
2474 intermediate_state = g_new0(BlkIntermediateStates, 1);
2475 intermediate_state->bs = intermediate;
2476 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2478 if (intermediate->backing_hd == base) {
2479 base_bs = intermediate->backing_hd;
2482 intermediate = intermediate->backing_hd;
2484 if (base_bs == NULL) {
2485 /* something went wrong, we did not end at the base. safely
2486 * unravel everything, and exit with error */
2490 /* success - we can delete the intermediate states, and link top->base */
2491 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2492 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2493 base_bs->drv ? base_bs->drv->format_name : "");
2497 bdrv_set_backing_hd(new_top_bs, base_bs);
2499 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2500 /* so that bdrv_close() does not recursively close the chain */
2501 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2502 bdrv_unref(intermediate_state->bs);
2507 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2508 g_free(intermediate_state);
2514 * Truncate file to 'offset' bytes (needed only for file protocols)
2516 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2518 BlockDriver *drv = bs->drv;
2522 if (!drv->bdrv_truncate)
2527 ret = drv->bdrv_truncate(bs, offset);
2529 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2530 bdrv_dirty_bitmap_truncate(bs);
2532 blk_dev_resize_cb(bs->blk);
2539 * Length of a allocated file in bytes. Sparse files are counted by actual
2540 * allocated space. Return < 0 if error or unknown.
2542 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2544 BlockDriver *drv = bs->drv;
2548 if (drv->bdrv_get_allocated_file_size) {
2549 return drv->bdrv_get_allocated_file_size(bs);
2552 return bdrv_get_allocated_file_size(bs->file);
2558 * Return number of sectors on success, -errno on error.
2560 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2562 BlockDriver *drv = bs->drv;
2567 if (drv->has_variable_length) {
2568 int ret = refresh_total_sectors(bs, bs->total_sectors);
2573 return bs->total_sectors;
2577 * Return length in bytes on success, -errno on error.
2578 * The length is always a multiple of BDRV_SECTOR_SIZE.
2580 int64_t bdrv_getlength(BlockDriverState *bs)
2582 int64_t ret = bdrv_nb_sectors(bs);
2584 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2585 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2588 /* return 0 as number of sectors if no device present or error */
2589 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2591 int64_t nb_sectors = bdrv_nb_sectors(bs);
2593 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2596 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2597 BlockdevOnError on_write_error)
2599 bs->on_read_error = on_read_error;
2600 bs->on_write_error = on_write_error;
2603 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2605 return is_read ? bs->on_read_error : bs->on_write_error;
2608 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2610 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2613 case BLOCKDEV_ON_ERROR_ENOSPC:
2614 return (error == ENOSPC) ?
2615 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2616 case BLOCKDEV_ON_ERROR_STOP:
2617 return BLOCK_ERROR_ACTION_STOP;
2618 case BLOCKDEV_ON_ERROR_REPORT:
2619 return BLOCK_ERROR_ACTION_REPORT;
2620 case BLOCKDEV_ON_ERROR_IGNORE:
2621 return BLOCK_ERROR_ACTION_IGNORE;
2627 static void send_qmp_error_event(BlockDriverState *bs,
2628 BlockErrorAction action,
2629 bool is_read, int error)
2631 IoOperationType optype;
2633 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2634 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2635 bdrv_iostatus_is_enabled(bs),
2636 error == ENOSPC, strerror(error),
2640 /* This is done by device models because, while the block layer knows
2641 * about the error, it does not know whether an operation comes from
2642 * the device or the block layer (from a job, for example).
2644 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2645 bool is_read, int error)
2649 if (action == BLOCK_ERROR_ACTION_STOP) {
2650 /* First set the iostatus, so that "info block" returns an iostatus
2651 * that matches the events raised so far (an additional error iostatus
2652 * is fine, but not a lost one).
2654 bdrv_iostatus_set_err(bs, error);
2656 /* Then raise the request to stop the VM and the event.
2657 * qemu_system_vmstop_request_prepare has two effects. First,
2658 * it ensures that the STOP event always comes after the
2659 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2660 * can observe the STOP event and do a "cont" before the STOP
2661 * event is issued, the VM will not stop. In this case, vm_start()
2662 * also ensures that the STOP/RESUME pair of events is emitted.
2664 qemu_system_vmstop_request_prepare();
2665 send_qmp_error_event(bs, action, is_read, error);
2666 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2668 send_qmp_error_event(bs, action, is_read, error);
2672 int bdrv_is_read_only(BlockDriverState *bs)
2674 return bs->read_only;
2677 int bdrv_is_sg(BlockDriverState *bs)
2682 int bdrv_enable_write_cache(BlockDriverState *bs)
2684 return bs->enable_write_cache;
2687 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2689 bs->enable_write_cache = wce;
2691 /* so a reopen() will preserve wce */
2693 bs->open_flags |= BDRV_O_CACHE_WB;
2695 bs->open_flags &= ~BDRV_O_CACHE_WB;
2699 int bdrv_is_encrypted(BlockDriverState *bs)
2701 if (bs->backing_hd && bs->backing_hd->encrypted)
2703 return bs->encrypted;
2706 int bdrv_key_required(BlockDriverState *bs)
2708 BlockDriverState *backing_hd = bs->backing_hd;
2710 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2712 return (bs->encrypted && !bs->valid_key);
2715 int bdrv_set_key(BlockDriverState *bs, const char *key)
2718 if (bs->backing_hd && bs->backing_hd->encrypted) {
2719 ret = bdrv_set_key(bs->backing_hd, key);
2725 if (!bs->encrypted) {
2727 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2730 ret = bs->drv->bdrv_set_key(bs, key);
2733 } else if (!bs->valid_key) {
2736 /* call the change callback now, we skipped it on open */
2737 blk_dev_change_media_cb(bs->blk, true);
2744 * Provide an encryption key for @bs.
2745 * If @key is non-null:
2746 * If @bs is not encrypted, fail.
2747 * Else if the key is invalid, fail.
2748 * Else set @bs's key to @key, replacing the existing key, if any.
2750 * If @bs is encrypted and still lacks a key, fail.
2752 * On failure, store an error object through @errp if non-null.
2754 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2757 if (!bdrv_is_encrypted(bs)) {
2758 error_setg(errp, "Node '%s' is not encrypted",
2759 bdrv_get_device_or_node_name(bs));
2760 } else if (bdrv_set_key(bs, key) < 0) {
2761 error_setg(errp, QERR_INVALID_PASSWORD);
2764 if (bdrv_key_required(bs)) {
2765 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2766 "'%s' (%s) is encrypted",
2767 bdrv_get_device_or_node_name(bs),
2768 bdrv_get_encrypted_filename(bs));
2773 const char *bdrv_get_format_name(BlockDriverState *bs)
2775 return bs->drv ? bs->drv->format_name : NULL;
2778 static int qsort_strcmp(const void *a, const void *b)
2780 return strcmp(a, b);
2783 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2789 const char **formats = NULL;
2791 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2792 if (drv->format_name) {
2795 while (formats && i && !found) {
2796 found = !strcmp(formats[--i], drv->format_name);
2800 formats = g_renew(const char *, formats, count + 1);
2801 formats[count++] = drv->format_name;
2806 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2808 for (i = 0; i < count; i++) {
2809 it(opaque, formats[i]);
2815 /* This function is to find a node in the bs graph */
2816 BlockDriverState *bdrv_find_node(const char *node_name)
2818 BlockDriverState *bs;
2822 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2823 if (!strcmp(node_name, bs->node_name)) {
2830 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2831 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2833 BlockDeviceInfoList *list, *entry;
2834 BlockDriverState *bs;
2837 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2838 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2840 qapi_free_BlockDeviceInfoList(list);
2843 entry = g_malloc0(sizeof(*entry));
2844 entry->value = info;
2852 BlockDriverState *bdrv_lookup_bs(const char *device,
2853 const char *node_name,
2857 BlockDriverState *bs;
2860 blk = blk_by_name(device);
2868 bs = bdrv_find_node(node_name);
2875 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2876 device ? device : "",
2877 node_name ? node_name : "");
2881 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2882 * return false. If either argument is NULL, return false. */
2883 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2885 while (top && top != base) {
2886 top = top->backing_hd;
2892 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2895 return QTAILQ_FIRST(&graph_bdrv_states);
2897 return QTAILQ_NEXT(bs, node_list);
2900 BlockDriverState *bdrv_next(BlockDriverState *bs)
2903 return QTAILQ_FIRST(&bdrv_states);
2905 return QTAILQ_NEXT(bs, device_list);
2908 const char *bdrv_get_node_name(const BlockDriverState *bs)
2910 return bs->node_name;
2913 /* TODO check what callers really want: bs->node_name or blk_name() */
2914 const char *bdrv_get_device_name(const BlockDriverState *bs)
2916 return bs->blk ? blk_name(bs->blk) : "";
2919 /* This can be used to identify nodes that might not have a device
2920 * name associated. Since node and device names live in the same
2921 * namespace, the result is unambiguous. The exception is if both are
2922 * absent, then this returns an empty (non-null) string. */
2923 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2925 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2928 int bdrv_get_flags(BlockDriverState *bs)
2930 return bs->open_flags;
2933 int bdrv_has_zero_init_1(BlockDriverState *bs)
2938 int bdrv_has_zero_init(BlockDriverState *bs)
2942 /* If BS is a copy on write image, it is initialized to
2943 the contents of the base image, which may not be zeroes. */
2944 if (bs->backing_hd) {
2947 if (bs->drv->bdrv_has_zero_init) {
2948 return bs->drv->bdrv_has_zero_init(bs);
2955 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2957 BlockDriverInfo bdi;
2959 if (bs->backing_hd) {
2963 if (bdrv_get_info(bs, &bdi) == 0) {
2964 return bdi.unallocated_blocks_are_zero;
2970 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2972 BlockDriverInfo bdi;
2974 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2978 if (bdrv_get_info(bs, &bdi) == 0) {
2979 return bdi.can_write_zeroes_with_unmap;
2985 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2987 if (bs->backing_hd && bs->backing_hd->encrypted)
2988 return bs->backing_file;
2989 else if (bs->encrypted)
2990 return bs->filename;
2995 void bdrv_get_backing_filename(BlockDriverState *bs,
2996 char *filename, int filename_size)
2998 pstrcpy(filename, filename_size, bs->backing_file);
3001 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3003 BlockDriver *drv = bs->drv;
3006 if (!drv->bdrv_get_info)
3008 memset(bdi, 0, sizeof(*bdi));
3009 return drv->bdrv_get_info(bs, bdi);
3012 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3014 BlockDriver *drv = bs->drv;
3015 if (drv && drv->bdrv_get_specific_info) {
3016 return drv->bdrv_get_specific_info(bs);
3021 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
3023 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3027 bs->drv->bdrv_debug_event(bs, event);
3030 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3033 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3037 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3038 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3044 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3046 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3050 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3051 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3057 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3059 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3063 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3064 return bs->drv->bdrv_debug_resume(bs, tag);
3070 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3072 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3076 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3077 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3083 int bdrv_is_snapshot(BlockDriverState *bs)
3085 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3088 /* backing_file can either be relative, or absolute, or a protocol. If it is
3089 * relative, it must be relative to the chain. So, passing in bs->filename
3090 * from a BDS as backing_file should not be done, as that may be relative to
3091 * the CWD rather than the chain. */
3092 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3093 const char *backing_file)
3095 char *filename_full = NULL;
3096 char *backing_file_full = NULL;
3097 char *filename_tmp = NULL;
3098 int is_protocol = 0;
3099 BlockDriverState *curr_bs = NULL;
3100 BlockDriverState *retval = NULL;
3102 if (!bs || !bs->drv || !backing_file) {
3106 filename_full = g_malloc(PATH_MAX);
3107 backing_file_full = g_malloc(PATH_MAX);
3108 filename_tmp = g_malloc(PATH_MAX);
3110 is_protocol = path_has_protocol(backing_file);
3112 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
3114 /* If either of the filename paths is actually a protocol, then
3115 * compare unmodified paths; otherwise make paths relative */
3116 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3117 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3118 retval = curr_bs->backing_hd;
3122 /* If not an absolute filename path, make it relative to the current
3123 * image's filename path */
3124 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3127 /* We are going to compare absolute pathnames */
3128 if (!realpath(filename_tmp, filename_full)) {
3132 /* We need to make sure the backing filename we are comparing against
3133 * is relative to the current image filename (or absolute) */
3134 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3135 curr_bs->backing_file);
3137 if (!realpath(filename_tmp, backing_file_full)) {
3141 if (strcmp(backing_file_full, filename_full) == 0) {
3142 retval = curr_bs->backing_hd;
3148 g_free(filename_full);
3149 g_free(backing_file_full);
3150 g_free(filename_tmp);
3154 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3160 if (!bs->backing_hd) {
3164 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3167 void bdrv_init(void)
3169 module_call_init(MODULE_INIT_BLOCK);
3172 void bdrv_init_with_whitelist(void)
3174 use_bdrv_whitelist = 1;
3178 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3180 Error *local_err = NULL;
3187 if (!(bs->open_flags & BDRV_O_INCOMING)) {
3190 bs->open_flags &= ~BDRV_O_INCOMING;
3192 if (bs->drv->bdrv_invalidate_cache) {
3193 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3194 } else if (bs->file) {
3195 bdrv_invalidate_cache(bs->file, &local_err);
3198 error_propagate(errp, local_err);
3202 ret = refresh_total_sectors(bs, bs->total_sectors);
3204 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3209 void bdrv_invalidate_cache_all(Error **errp)
3211 BlockDriverState *bs;
3212 Error *local_err = NULL;
3214 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3215 AioContext *aio_context = bdrv_get_aio_context(bs);
3217 aio_context_acquire(aio_context);
3218 bdrv_invalidate_cache(bs, &local_err);
3219 aio_context_release(aio_context);
3221 error_propagate(errp, local_err);
3227 /**************************************************************/
3228 /* removable device support */
3231 * Return TRUE if the media is present
3233 int bdrv_is_inserted(BlockDriverState *bs)
3235 BlockDriver *drv = bs->drv;
3239 if (!drv->bdrv_is_inserted)
3241 return drv->bdrv_is_inserted(bs);
3245 * Return whether the media changed since the last call to this
3246 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3248 int bdrv_media_changed(BlockDriverState *bs)
3250 BlockDriver *drv = bs->drv;
3252 if (drv && drv->bdrv_media_changed) {
3253 return drv->bdrv_media_changed(bs);
3259 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3261 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3263 BlockDriver *drv = bs->drv;
3264 const char *device_name;
3266 if (drv && drv->bdrv_eject) {
3267 drv->bdrv_eject(bs, eject_flag);
3270 device_name = bdrv_get_device_name(bs);
3271 if (device_name[0] != '\0') {
3272 qapi_event_send_device_tray_moved(device_name,
3273 eject_flag, &error_abort);
3278 * Lock or unlock the media (if it is locked, the user won't be able
3279 * to eject it manually).
3281 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3283 BlockDriver *drv = bs->drv;
3285 trace_bdrv_lock_medium(bs, locked);
3287 if (drv && drv->bdrv_lock_medium) {
3288 drv->bdrv_lock_medium(bs, locked);
3292 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3294 bs->guest_block_size = align;
3297 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3299 BdrvDirtyBitmap *bm;
3302 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3303 if (bm->name && !strcmp(name, bm->name)) {
3310 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3312 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3313 g_free(bitmap->name);
3314 bitmap->name = NULL;
3317 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3318 uint32_t granularity,
3322 int64_t bitmap_size;
3323 BdrvDirtyBitmap *bitmap;
3324 uint32_t sector_granularity;
3326 assert((granularity & (granularity - 1)) == 0);
3328 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3329 error_setg(errp, "Bitmap already exists: %s", name);
3332 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3333 assert(sector_granularity);
3334 bitmap_size = bdrv_nb_sectors(bs);
3335 if (bitmap_size < 0) {
3336 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3337 errno = -bitmap_size;
3340 bitmap = g_new0(BdrvDirtyBitmap, 1);
3341 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3342 bitmap->size = bitmap_size;
3343 bitmap->name = g_strdup(name);
3344 bitmap->disabled = false;
3345 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3349 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3351 return bitmap->successor;
3354 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3356 return !(bitmap->disabled || bitmap->successor);
3359 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3361 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3362 return DIRTY_BITMAP_STATUS_FROZEN;
3363 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3364 return DIRTY_BITMAP_STATUS_DISABLED;
3366 return DIRTY_BITMAP_STATUS_ACTIVE;
3371 * Create a successor bitmap destined to replace this bitmap after an operation.
3372 * Requires that the bitmap is not frozen and has no successor.
3374 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3375 BdrvDirtyBitmap *bitmap, Error **errp)
3377 uint64_t granularity;
3378 BdrvDirtyBitmap *child;
3380 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3381 error_setg(errp, "Cannot create a successor for a bitmap that is "
3382 "currently frozen");
3385 assert(!bitmap->successor);
3387 /* Create an anonymous successor */
3388 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3389 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3394 /* Successor will be on or off based on our current state. */
3395 child->disabled = bitmap->disabled;
3397 /* Install the successor and freeze the parent */
3398 bitmap->successor = child;
3403 * For a bitmap with a successor, yield our name to the successor,
3404 * delete the old bitmap, and return a handle to the new bitmap.
3406 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3407 BdrvDirtyBitmap *bitmap,
3411 BdrvDirtyBitmap *successor = bitmap->successor;
3413 if (successor == NULL) {
3414 error_setg(errp, "Cannot relinquish control if "
3415 "there's no successor present");
3419 name = bitmap->name;
3420 bitmap->name = NULL;
3421 successor->name = name;
3422 bitmap->successor = NULL;
3423 bdrv_release_dirty_bitmap(bs, bitmap);
3429 * In cases of failure where we can no longer safely delete the parent,
3430 * we may wish to re-join the parent and child/successor.
3431 * The merged parent will be un-frozen, but not explicitly re-enabled.
3433 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3434 BdrvDirtyBitmap *parent,
3437 BdrvDirtyBitmap *successor = parent->successor;
3440 error_setg(errp, "Cannot reclaim a successor when none is present");
3444 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3445 error_setg(errp, "Merging of parent and successor bitmap failed");
3448 bdrv_release_dirty_bitmap(bs, successor);
3449 parent->successor = NULL;
3455 * Truncates _all_ bitmaps attached to a BDS.
3457 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3459 BdrvDirtyBitmap *bitmap;
3460 uint64_t size = bdrv_nb_sectors(bs);
3462 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3463 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3464 hbitmap_truncate(bitmap->bitmap, size);
3465 bitmap->size = size;
3469 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3471 BdrvDirtyBitmap *bm, *next;
3472 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3474 assert(!bdrv_dirty_bitmap_frozen(bm));
3475 QLIST_REMOVE(bitmap, list);
3476 hbitmap_free(bitmap->bitmap);
3477 g_free(bitmap->name);
3484 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3486 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3487 bitmap->disabled = true;
3490 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3492 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3493 bitmap->disabled = false;
3496 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3498 BdrvDirtyBitmap *bm;
3499 BlockDirtyInfoList *list = NULL;
3500 BlockDirtyInfoList **plist = &list;
3502 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3503 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3504 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3505 info->count = bdrv_get_dirty_count(bm);
3506 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3507 info->has_name = !!bm->name;
3508 info->name = g_strdup(bm->name);
3509 info->status = bdrv_dirty_bitmap_status(bm);
3510 entry->value = info;
3512 plist = &entry->next;
3518 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3521 return hbitmap_get(bitmap->bitmap, sector);
3528 * Chooses a default granularity based on the existing cluster size,
3529 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3530 * is no cluster size information available.
3532 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3534 BlockDriverInfo bdi;
3535 uint32_t granularity;
3537 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3538 granularity = MAX(4096, bdi.cluster_size);
3539 granularity = MIN(65536, granularity);
3541 granularity = 65536;
3547 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3549 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3552 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3554 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3557 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3558 int64_t cur_sector, int nr_sectors)
3560 assert(bdrv_dirty_bitmap_enabled(bitmap));
3561 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3564 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3565 int64_t cur_sector, int nr_sectors)
3567 assert(bdrv_dirty_bitmap_enabled(bitmap));
3568 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3571 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3573 assert(bdrv_dirty_bitmap_enabled(bitmap));
3574 hbitmap_reset_all(bitmap->bitmap);
3577 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3580 BdrvDirtyBitmap *bitmap;
3581 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3582 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3585 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3590 * Advance an HBitmapIter to an arbitrary offset.
3592 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3595 hbitmap_iter_init(hbi, hbi->hb, offset);
3598 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3600 return hbitmap_count(bitmap->bitmap);
3603 /* Get a reference to bs */
3604 void bdrv_ref(BlockDriverState *bs)
3609 /* Release a previously grabbed reference to bs.
3610 * If after releasing, reference count is zero, the BlockDriverState is
3612 void bdrv_unref(BlockDriverState *bs)
3617 assert(bs->refcnt > 0);
3618 if (--bs->refcnt == 0) {
3623 struct BdrvOpBlocker {
3625 QLIST_ENTRY(BdrvOpBlocker) list;
3628 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3630 BdrvOpBlocker *blocker;
3631 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3632 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3633 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3635 error_setg(errp, "Node '%s' is busy: %s",
3636 bdrv_get_device_or_node_name(bs),
3637 error_get_pretty(blocker->reason));
3644 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3646 BdrvOpBlocker *blocker;
3647 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3649 blocker = g_new0(BdrvOpBlocker, 1);
3650 blocker->reason = reason;
3651 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3654 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3656 BdrvOpBlocker *blocker, *next;
3657 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3658 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3659 if (blocker->reason == reason) {
3660 QLIST_REMOVE(blocker, list);
3666 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3669 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3670 bdrv_op_block(bs, i, reason);
3674 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3677 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3678 bdrv_op_unblock(bs, i, reason);
3682 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3686 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3687 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3694 void bdrv_iostatus_enable(BlockDriverState *bs)
3696 bs->iostatus_enabled = true;
3697 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3700 /* The I/O status is only enabled if the drive explicitly
3701 * enables it _and_ the VM is configured to stop on errors */
3702 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3704 return (bs->iostatus_enabled &&
3705 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3706 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3707 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3710 void bdrv_iostatus_disable(BlockDriverState *bs)
3712 bs->iostatus_enabled = false;
3715 void bdrv_iostatus_reset(BlockDriverState *bs)
3717 if (bdrv_iostatus_is_enabled(bs)) {
3718 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3720 block_job_iostatus_reset(bs->job);
3725 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3727 assert(bdrv_iostatus_is_enabled(bs));
3728 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3729 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3730 BLOCK_DEVICE_IO_STATUS_FAILED;
3734 void bdrv_img_create(const char *filename, const char *fmt,
3735 const char *base_filename, const char *base_fmt,
3736 char *options, uint64_t img_size, int flags,
3737 Error **errp, bool quiet)
3739 QemuOptsList *create_opts = NULL;
3740 QemuOpts *opts = NULL;
3741 const char *backing_fmt, *backing_file;
3743 BlockDriver *drv, *proto_drv;
3744 BlockDriver *backing_drv = NULL;
3745 Error *local_err = NULL;
3748 /* Find driver and parse its options */
3749 drv = bdrv_find_format(fmt);
3751 error_setg(errp, "Unknown file format '%s'", fmt);
3755 proto_drv = bdrv_find_protocol(filename, true, errp);
3760 if (!drv->create_opts) {
3761 error_setg(errp, "Format driver '%s' does not support image creation",
3766 if (!proto_drv->create_opts) {
3767 error_setg(errp, "Protocol driver '%s' does not support image creation",
3768 proto_drv->format_name);
3772 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3773 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3775 /* Create parameter list with default values */
3776 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3777 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3779 /* Parse -o options */
3781 qemu_opts_do_parse(opts, options, NULL, &local_err);
3783 error_report_err(local_err);
3785 error_setg(errp, "Invalid options for file format '%s'", fmt);
3790 if (base_filename) {
3791 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3793 error_setg(errp, "Backing file not supported for file format '%s'",
3800 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3802 error_setg(errp, "Backing file format not supported for file "
3803 "format '%s'", fmt);
3808 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3810 if (!strcmp(filename, backing_file)) {
3811 error_setg(errp, "Error: Trying to create an image with the "
3812 "same filename as the backing file");
3817 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3819 backing_drv = bdrv_find_format(backing_fmt);
3821 error_setg(errp, "Unknown backing file format '%s'",
3827 // The size for the image must always be specified, with one exception:
3828 // If we are using a backing file, we can obtain the size from there
3829 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3832 BlockDriverState *bs;
3833 char *full_backing = g_new0(char, PATH_MAX);
3837 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3838 full_backing, PATH_MAX,
3841 g_free(full_backing);
3845 /* backing files always opened read-only */
3847 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3850 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
3851 backing_drv, &local_err);
3852 g_free(full_backing);
3856 size = bdrv_getlength(bs);
3858 error_setg_errno(errp, -size, "Could not get size of '%s'",
3864 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3868 error_setg(errp, "Image creation needs a size parameter");
3874 printf("Formatting '%s', fmt=%s", filename, fmt);
3875 qemu_opts_print(opts, " ");
3879 ret = bdrv_create(drv, filename, opts, &local_err);
3881 if (ret == -EFBIG) {
3882 /* This is generally a better message than whatever the driver would
3883 * deliver (especially because of the cluster_size_hint), since that
3884 * is most probably not much different from "image too large". */
3885 const char *cluster_size_hint = "";
3886 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3887 cluster_size_hint = " (try using a larger cluster size)";
3889 error_setg(errp, "The image size is too large for file format '%s'"
3890 "%s", fmt, cluster_size_hint);
3891 error_free(local_err);
3896 qemu_opts_del(opts);
3897 qemu_opts_free(create_opts);
3899 error_propagate(errp, local_err);
3903 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3905 return bs->aio_context;
3908 void bdrv_detach_aio_context(BlockDriverState *bs)
3910 BdrvAioNotifier *baf;
3916 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3917 baf->detach_aio_context(baf->opaque);
3920 if (bs->io_limits_enabled) {
3921 throttle_timers_detach_aio_context(&bs->throttle_timers);
3923 if (bs->drv->bdrv_detach_aio_context) {
3924 bs->drv->bdrv_detach_aio_context(bs);
3927 bdrv_detach_aio_context(bs->file);
3929 if (bs->backing_hd) {
3930 bdrv_detach_aio_context(bs->backing_hd);
3933 bs->aio_context = NULL;
3936 void bdrv_attach_aio_context(BlockDriverState *bs,
3937 AioContext *new_context)
3939 BdrvAioNotifier *ban;
3945 bs->aio_context = new_context;
3947 if (bs->backing_hd) {
3948 bdrv_attach_aio_context(bs->backing_hd, new_context);
3951 bdrv_attach_aio_context(bs->file, new_context);
3953 if (bs->drv->bdrv_attach_aio_context) {
3954 bs->drv->bdrv_attach_aio_context(bs, new_context);
3956 if (bs->io_limits_enabled) {
3957 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3960 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3961 ban->attached_aio_context(new_context, ban->opaque);
3965 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3967 bdrv_drain(bs); /* ensure there are no in-flight requests */
3969 bdrv_detach_aio_context(bs);
3971 /* This function executes in the old AioContext so acquire the new one in
3972 * case it runs in a different thread.
3974 aio_context_acquire(new_context);
3975 bdrv_attach_aio_context(bs, new_context);
3976 aio_context_release(new_context);
3979 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3980 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3981 void (*detach_aio_context)(void *opaque), void *opaque)
3983 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3984 *ban = (BdrvAioNotifier){
3985 .attached_aio_context = attached_aio_context,
3986 .detach_aio_context = detach_aio_context,
3990 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3993 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3994 void (*attached_aio_context)(AioContext *,
3996 void (*detach_aio_context)(void *),
3999 BdrvAioNotifier *ban, *ban_next;
4001 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4002 if (ban->attached_aio_context == attached_aio_context &&
4003 ban->detach_aio_context == detach_aio_context &&
4004 ban->opaque == opaque)
4006 QLIST_REMOVE(ban, list);
4016 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4017 BlockDriverAmendStatusCB *status_cb)
4019 if (!bs->drv->bdrv_amend_options) {
4022 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
4025 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4026 * of block filter and by bdrv_is_first_non_filter.
4027 * It is used to test if the given bs is the candidate or recurse more in the
4030 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4031 BlockDriverState *candidate)
4033 /* return false if basic checks fails */
4034 if (!bs || !bs->drv) {
4038 /* the code reached a non block filter driver -> check if the bs is
4039 * the same as the candidate. It's the recursion termination condition.
4041 if (!bs->drv->is_filter) {
4042 return bs == candidate;
4044 /* Down this path the driver is a block filter driver */
4046 /* If the block filter recursion method is defined use it to recurse down
4049 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4050 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4053 /* the driver is a block filter but don't allow to recurse -> return false
4058 /* This function checks if the candidate is the first non filter bs down it's
4059 * bs chain. Since we don't have pointers to parents it explore all bs chains
4060 * from the top. Some filters can choose not to pass down the recursion.
4062 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4064 BlockDriverState *bs;
4066 /* walk down the bs forest recursively */
4067 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4070 /* try to recurse in this top level bs */
4071 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4073 /* candidate is the first non filter */
4082 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
4084 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4085 AioContext *aio_context;
4087 if (!to_replace_bs) {
4088 error_setg(errp, "Node name '%s' not found", node_name);
4092 aio_context = bdrv_get_aio_context(to_replace_bs);
4093 aio_context_acquire(aio_context);
4095 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4096 to_replace_bs = NULL;
4100 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4101 * most non filter in order to prevent data corruption.
4102 * Another benefit is that this tests exclude backing files which are
4103 * blocked by the backing blockers.
4105 if (!bdrv_is_first_non_filter(to_replace_bs)) {
4106 error_setg(errp, "Only top most non filter can be replaced");
4107 to_replace_bs = NULL;
4112 aio_context_release(aio_context);
4113 return to_replace_bs;
4116 static bool append_open_options(QDict *d, BlockDriverState *bs)
4118 const QDictEntry *entry;
4119 bool found_any = false;
4121 for (entry = qdict_first(bs->options); entry;
4122 entry = qdict_next(bs->options, entry))
4124 /* Only take options for this level and exclude all non-driver-specific
4126 if (!strchr(qdict_entry_key(entry), '.') &&
4127 strcmp(qdict_entry_key(entry), "node-name"))
4129 qobject_incref(qdict_entry_value(entry));
4130 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4138 /* Updates the following BDS fields:
4139 * - exact_filename: A filename which may be used for opening a block device
4140 * which (mostly) equals the given BDS (even without any
4141 * other options; so reading and writing must return the same
4142 * results, but caching etc. may be different)
4143 * - full_open_options: Options which, when given when opening a block device
4144 * (without a filename), result in a BDS (mostly)
4145 * equalling the given one
4146 * - filename: If exact_filename is set, it is copied here. Otherwise,
4147 * full_open_options is converted to a JSON object, prefixed with
4148 * "json:" (for use through the JSON pseudo protocol) and put here.
4150 void bdrv_refresh_filename(BlockDriverState *bs)
4152 BlockDriver *drv = bs->drv;
4159 /* This BDS's file name will most probably depend on its file's name, so
4160 * refresh that first */
4162 bdrv_refresh_filename(bs->file);
4165 if (drv->bdrv_refresh_filename) {
4166 /* Obsolete information is of no use here, so drop the old file name
4167 * information before refreshing it */
4168 bs->exact_filename[0] = '\0';
4169 if (bs->full_open_options) {
4170 QDECREF(bs->full_open_options);
4171 bs->full_open_options = NULL;
4174 drv->bdrv_refresh_filename(bs);
4175 } else if (bs->file) {
4176 /* Try to reconstruct valid information from the underlying file */
4177 bool has_open_options;
4179 bs->exact_filename[0] = '\0';
4180 if (bs->full_open_options) {
4181 QDECREF(bs->full_open_options);
4182 bs->full_open_options = NULL;
4186 has_open_options = append_open_options(opts, bs);
4188 /* If no specific options have been given for this BDS, the filename of
4189 * the underlying file should suffice for this one as well */
4190 if (bs->file->exact_filename[0] && !has_open_options) {
4191 strcpy(bs->exact_filename, bs->file->exact_filename);
4193 /* Reconstructing the full options QDict is simple for most format block
4194 * drivers, as long as the full options are known for the underlying
4195 * file BDS. The full options QDict of that file BDS should somehow
4196 * contain a representation of the filename, therefore the following
4197 * suffices without querying the (exact_)filename of this BDS. */
4198 if (bs->file->full_open_options) {
4199 qdict_put_obj(opts, "driver",
4200 QOBJECT(qstring_from_str(drv->format_name)));
4201 QINCREF(bs->file->full_open_options);
4202 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4204 bs->full_open_options = opts;
4208 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4209 /* There is no underlying file BDS (at least referenced by BDS.file),
4210 * so the full options QDict should be equal to the options given
4211 * specifically for this block device when it was opened (plus the
4212 * driver specification).
4213 * Because those options don't change, there is no need to update
4214 * full_open_options when it's already set. */
4217 append_open_options(opts, bs);
4218 qdict_put_obj(opts, "driver",
4219 QOBJECT(qstring_from_str(drv->format_name)));
4221 if (bs->exact_filename[0]) {
4222 /* This may not work for all block protocol drivers (some may
4223 * require this filename to be parsed), but we have to find some
4224 * default solution here, so just include it. If some block driver
4225 * does not support pure options without any filename at all or
4226 * needs some special format of the options QDict, it needs to
4227 * implement the driver-specific bdrv_refresh_filename() function.
4229 qdict_put_obj(opts, "filename",
4230 QOBJECT(qstring_from_str(bs->exact_filename)));
4233 bs->full_open_options = opts;
4236 if (bs->exact_filename[0]) {
4237 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4238 } else if (bs->full_open_options) {
4239 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4240 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4241 qstring_get_str(json));
4246 /* This accessor function purpose is to allow the device models to access the
4247 * BlockAcctStats structure embedded inside a BlockDriverState without being
4248 * aware of the BlockDriverState structure layout.
4249 * It will go away when the BlockAcctStats structure will be moved inside
4250 * the device models.
4252 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)